# Pandas Introduction

`pandas` is a Python Package that provides a powerful Data analysis and manipulation library
for Python. It provides 
1. Data Structures for convenient storage of univariate/multivariate data.
2. The tools for manipulating data. 

`pandas` is built on top of NumPy.

`pandas` is typically imported as

In [1]:
import pandas as pd

`Series` and `DataFrames` are the two important data structures provided by Pandas.

In [2]:
import numpy as np

In [3]:
x = [1, 1.2, 1.4, 1.6, 1.8, 2.0]
y = np.random.randn(10)

In [4]:
x

[1, 1.2, 1.4, 1.6, 1.8, 2.0]

In [5]:
y

array([-1.23600623, -0.94111523, -0.26536483, -0.52568792,  0.30009324,
        0.7214413 ,  0.54532709,  0.36939349, -0.32734405, -0.45218398])

In [6]:
x1 = pd.Series(x)

In [7]:
x1

0    1.0
1    1.2
2    1.4
3    1.6
4    1.8
5    2.0
dtype: float64

In [8]:
x2 = pd.Series([2.7, 2.1, 3.2, 2.1, 2.0, 3.2, 3.6])
x2

0    2.7
1    2.1
2    3.2
3    2.1
4    2.0
5    3.2
6    3.6
dtype: float64

In [9]:
x2.values

array([2.7, 2.1, 3.2, 2.1, 2. , 3.2, 3.6])

In [10]:
type(x2.values)

numpy.ndarray

In [11]:
x2.size

7

In [12]:
x2.index = ['a', 'b', 'c', 'd', 'e', 'f', 'g']

In [13]:
x2

a    2.7
b    2.1
c    3.2
d    2.1
e    2.0
f    3.2
g    3.6
dtype: float64

In [14]:
x3 = pd.Series([10, 15, 13, 18, 11], index = ['A', 'B', 'C', 'D', 'E'])
x3

A    10
B    15
C    13
D    18
E    11
dtype: int64

In [15]:
x3['B':'E']

B    15
C    13
D    18
E    11
dtype: int64

In [16]:
x3[1:3]

B    15
C    13
dtype: int64

In [17]:
x3[[0, 2, 4]]

A    10
C    13
E    11
dtype: int64

In [18]:
x3[x3>=15]

B    15
D    18
dtype: int64

In [19]:
eligible = x3>= 15
x3[eligible]

B    15
D    18
dtype: int64

In [20]:
y1 = pd.Series(y)
y1

0   -1.236006
1   -0.941115
2   -0.265365
3   -0.525688
4    0.300093
5    0.721441
6    0.545327
7    0.369393
8   -0.327344
9   -0.452184
dtype: float64

In [21]:
y1[1]

-0.9411152251824977

In [22]:
y1[:3]

0   -1.236006
1   -0.941115
2   -0.265365
dtype: float64

In [23]:
y1[[2, 6, 8]]

2   -0.265365
6    0.545327
8   -0.327344
dtype: float64

In [24]:
y2 = y1[[2, 6, 2]]
y2

2   -0.265365
6    0.545327
2   -0.265365
dtype: float64

In [25]:
y2.iloc[1]

0.5453270879547824

In [26]:
x4 = pd.Series({'A':34, 'C':38, 'E': 32})
x4

A    34
C    38
E    32
dtype: int64

In [27]:
x4.loc['A']

34

In [28]:
x4.iloc[0]

34

In [29]:
x4[0]

34

In [30]:
x3

A    10
B    15
C    13
D    18
E    11
dtype: int64

In [31]:
x3 = x3[:3]
x3

A    10
B    15
C    13
dtype: int64

In [32]:
x4 = pd.Series({'C':34, 'B':38, 'A': 32})
x4

C    34
B    38
A    32
dtype: int64

In [33]:
x4[0]

34

In [34]:
x4*2 

C    68
B    76
A    64
dtype: int64

In [35]:
x5 = pd.Series({'A':14, 'B':18, 'C': 12})
x5

A    14
B    18
C    12
dtype: int64

In [36]:
x4+x5

A    46
B    56
C    46
dtype: int64

In [37]:
x3.name = 'Scores'
x3.index.name = 'Player'
x3

Player
A    10
B    15
C    13
Name: Scores, dtype: int64

In [38]:
x3[0]

10

In [39]:
df1 = pd.DataFrame({'x':x, 'y':y[:6]})

In [40]:
df1

Unnamed: 0,x,y
0,1.0,-1.236006
1,1.2,-0.941115
2,1.4,-0.265365
3,1.6,-0.525688
4,1.8,0.300093
5,2.0,0.721441


In [41]:
df1.index

RangeIndex(start=0, stop=6, step=1)

In [42]:
df1.columns

Index(['x', 'y'], dtype='object')

In [43]:
df1.x

0    1.0
1    1.2
2    1.4
3    1.6
4    1.8
5    2.0
Name: x, dtype: float64

In [44]:
df1['x']

0    1.0
1    1.2
2    1.4
3    1.6
4    1.8
5    2.0
Name: x, dtype: float64

In [45]:
df1.columns = ['x values', 'y values']
df1

Unnamed: 0,x values,y values
0,1.0,-1.236006
1,1.2,-0.941115
2,1.4,-0.265365
3,1.6,-0.525688
4,1.8,0.300093
5,2.0,0.721441


In [46]:
df1.index = ['a', 'b', 'c', 'd', 'e', 'f']

In [47]:
df1

Unnamed: 0,x values,y values
a,1.0,-1.236006
b,1.2,-0.941115
c,1.4,-0.265365
d,1.6,-0.525688
e,1.8,0.300093
f,2.0,0.721441


In [48]:
df1['x values']

a    1.0
b    1.2
c    1.4
d    1.6
e    1.8
f    2.0
Name: x values, dtype: float64

In [49]:
df1.mean()

x values    1.50000
y values   -0.32444
dtype: float64

In [50]:
df1['id'] = ['i1', 'i2', 'i3', 'i4', 'i5', 'i6']
df1

Unnamed: 0,x values,y values,id
a,1.0,-1.236006,i1
b,1.2,-0.941115,i2
c,1.4,-0.265365,i3
d,1.6,-0.525688,i4
e,1.8,0.300093,i5
f,2.0,0.721441,i6


In [51]:
df1.mean()

  df1.mean()


x values    1.50000
y values   -0.32444
dtype: float64

In [52]:
df1.std()

  df1.std()


x values    0.374166
y values    0.740154
dtype: float64

In [53]:
df1.var()

  df1.var()


x values    0.140000
y values    0.547828
dtype: float64