# Pandas Introduction

`pandas` is a Python Package that provides a powerful Data analysis and manipulation library
for Python. It provides 
1. Data Structures for convenient storage of univariate/multivariate data.
2. The tools for manipulating data. 

`pandas` is built on top of NumPy.

`pandas` is typically imported as

In [5]:
import pandas as pd
import numpy as np

`Series` and `DataFrames` are the two important data structures provided by Pandas.

In [9]:
x = [1, 1.2, 1.4, 1.6, 1.8, 2.0]
y = np.random.randn(10)


In [10]:
x

[1, 1.2, 1.4, 1.6, 1.8, 2.0]

In [11]:
y

array([ 0.77717188,  1.28518959, -1.10388511, -1.42482689, -1.19000142,
        0.16255635,  0.24522565, -0.34449356, -2.2431106 ,  1.24480955])

In [12]:
x1 = pd.Series(x)

In [13]:
x1

0    1.0
1    1.2
2    1.4
3    1.6
4    1.8
5    2.0
dtype: float64

In [14]:
x2 = pd.Series([2.7, 2.1, 3.2, 2.1, 2.0, 3.2, 3.6])
x2

0    2.7
1    2.1
2    3.2
3    2.1
4    2.0
5    3.2
6    3.6
dtype: float64

In [15]:
x2.values

array([2.7, 2.1, 3.2, 2.1, 2. , 3.2, 3.6])

In [16]:
x2.size

7

In [17]:
x2.index = ['a', 'b', 'c', 'd', 'e', 'f', 'g']

In [18]:
x2

a    2.7
b    2.1
c    3.2
d    2.1
e    2.0
f    3.2
g    3.6
dtype: float64

In [19]:
x3 = pd.Series([10, 15, 13, 18, 11], index = ['A', 'B', 'C', 'D', 'E'])
x3

A    10
B    15
C    13
D    18
E    11
dtype: int64

In [20]:
x3['B':'E']

B    15
C    13
D    18
E    11
dtype: int64

In [21]:
x3[1:3]

B    15
C    13
dtype: int64

In [32]:
x3[[0, 2, 4]]

A    10
C    13
E    11
dtype: int64

In [33]:
x3

A    10
B    15
C    13
D    18
E    11
dtype: int64

In [34]:
x3[x3>=15]

B    15
D    18
dtype: int64

In [35]:
y1 = pd.Series(y)
y1

0    0.777172
1    1.285190
2   -1.103885
3   -1.424827
4   -1.190001
5    0.162556
6    0.245226
7   -0.344494
8   -2.243111
9    1.244810
dtype: float64

In [36]:
y1[1]

1.2851895885750795

In [37]:
y1[:3]

0    0.777172
1    1.285190
2   -1.103885
dtype: float64

In [38]:
y1[[2, 6, 8]]

2   -1.103885
6    0.245226
8   -2.243111
dtype: float64

In [39]:
y2 = y1[[2, 6, 2]]
y2

2   -1.103885
6    0.245226
2   -1.103885
dtype: float64

In [42]:
y2.iloc[1]

0.24522565253531464

In [43]:
x4 = pd.Series({'A':34, 'C':38, 'E': 32})
x4

A    34
C    38
E    32
dtype: int64

In [44]:
x4.loc['A']

34

In [45]:
x3

A    10
B    15
C    13
D    18
E    11
dtype: int64

In [46]:
x3 = x3[:3]
x3

A    10
B    15
C    13
dtype: int64

In [47]:
x4 = pd.Series({'C':34, 'B':38, 'A': 32})
x4

C    34
B    38
A    32
dtype: int64

In [49]:
x4*2

C    68
B    76
A    64
dtype: int64

In [50]:
x3.name = 'Scores'
x3.index.name = 'Player'
x3

Player
A    10
B    15
C    13
Name: Scores, dtype: int64

In [51]:
x3[0]

10

In [52]:
df1 = pd.DataFrame({'x':x, 'y':y[:6]})

In [53]:
df1

Unnamed: 0,x,y
0,1.0,0.777172
1,1.2,1.28519
2,1.4,-1.103885
3,1.6,-1.424827
4,1.8,-1.190001
5,2.0,0.162556


In [54]:
df1.index

RangeIndex(start=0, stop=6, step=1)

In [55]:
df1.columns

Index(['x', 'y'], dtype='object')

In [56]:
df1.x

0    1.0
1    1.2
2    1.4
3    1.6
4    1.8
5    2.0
Name: x, dtype: float64

In [57]:
df1['x']

0    1.0
1    1.2
2    1.4
3    1.6
4    1.8
5    2.0
Name: x, dtype: float64

In [58]:
df1.columns = ['x values', 'y values']
df1

Unnamed: 0,x values,y values
0,1.0,0.777172
1,1.2,1.28519
2,1.4,-1.103885
3,1.6,-1.424827
4,1.8,-1.190001
5,2.0,0.162556


In [59]:
df1.index = ['a', 'b', 'c', 'd', 'e', 'f']

In [60]:
df1

Unnamed: 0,x values,y values
a,1.0,0.777172
b,1.2,1.28519
c,1.4,-1.103885
d,1.6,-1.424827
e,1.8,-1.190001
f,2.0,0.162556


In [61]:
df1['x values']

a    1.0
b    1.2
c    1.4
d    1.6
e    1.8
f    2.0
Name: x values, dtype: float64

In [62]:
df1.mean()

x values    1.500000
y values   -0.248966
dtype: float64

In [26]:
df1['id'] = ['i1', 'i2', 'i3', 'i4', 'i5', 'i6']
df1

Unnamed: 0,x values,y values,id
a,1.0,0.309078,i1
b,1.2,0.048749,i2
c,1.4,-1.399838,i3
d,1.6,0.066904,i4
e,1.8,-0.357084,i5
f,2.0,-0.240697,i6


In [27]:
df1.mean()

x values    1.500000
y values   -0.262148
dtype: float64

In [28]:
df1.std()

x values    0.374166
y values    0.605812
dtype: float64

In [29]:
df1.var()

x values    0.140000
y values    0.367008
dtype: float64