In [1]:
import numpy as np
import pandas as pd

# Series

In [2]:
l1 = [1, 2, 3, 4, 5, 6]
labels = ['a', 'b', 'c', 'd', 'e', 'f']
d1 = {"A":10, "B":20, "C":30, "D":40, "E":50}

In [3]:
s1 = pd.Series(l1)
s1

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [4]:
s1[4]

5

In [5]:
s2 = pd.Series(labels)
s2

0    a
1    b
2    c
3    d
4    e
5    f
dtype: object

In [6]:
s2[4]

'e'

In [7]:
s3 = pd.Series(data=l1, index=labels)
s3

a    1
b    2
c    3
d    4
e    5
f    6
dtype: int64

In [8]:
s3['a']

1

In [9]:
s3[0]

1

In [10]:
pd.Series(d1)

A    10
B    20
C    30
D    40
E    50
dtype: int64

# DataFrame

In [12]:
arr = np.random.randint(low=1, high=100, size=(5, 6))
arr

array([[81, 99, 99, 26, 24, 59],
       [83, 86, 95, 38, 29, 31],
       [41, 46, 46, 83, 89, 60],
       [45, 44, 13, 84, 40, 45],
       [27, 74, 55, 27, 89, 55]])

In [13]:
type(arr)

numpy.ndarray

In [14]:
pd.DataFrame(arr)

Unnamed: 0,0,1,2,3,4,5
0,81,99,99,26,24,59
1,83,86,95,38,29,31
2,41,46,46,83,89,60
3,45,44,13,84,40,45
4,27,74,55,27,89,55


In [17]:
df = pd.DataFrame(arr, index=["A", "B", "C", "D", "E"], columns=["U", "V", "W", "X", "Y", "Z"])
df

Unnamed: 0,U,V,W,X,Y,Z
A,81,99,99,26,24,59
B,83,86,95,38,29,31
C,41,46,46,83,89,60
D,45,44,13,84,40,45
E,27,74,55,27,89,55


In [18]:
type(df)

pandas.core.frame.DataFrame

## Grabbing Columns

In [19]:
df["X"]

A    26
B    38
C    83
D    84
E    27
Name: X, dtype: int64

In [21]:
df[["X", "Z", "V"]]

Unnamed: 0,X,Z,V
A,26,59,99
B,38,31,86
C,83,60,46
D,84,45,44
E,27,55,74


## Grabbing Rows

In [23]:
df.loc["C"]

U    41
V    46
W    46
X    83
Y    89
Z    60
Name: C, dtype: int64

In [24]:
df.loc[["A", "B", "E"]]

Unnamed: 0,U,V,W,X,Y,Z
A,81,99,99,26,24,59
B,83,86,95,38,29,31
E,27,74,55,27,89,55


In [25]:
df.iloc[2]

U    41
V    46
W    46
X    83
Y    89
Z    60
Name: C, dtype: int64

## Adding a New Column

In [26]:
df

Unnamed: 0,U,V,W,X,Y,Z
A,81,99,99,26,24,59
B,83,86,95,38,29,31
C,41,46,46,83,89,60
D,45,44,13,84,40,45
E,27,74,55,27,89,55


In [27]:
df['New'] = [10, 20, 30, 40, 50]

In [28]:
df

Unnamed: 0,U,V,W,X,Y,Z,New
A,81,99,99,26,24,59,10
B,83,86,95,38,29,31,20
C,41,46,46,83,89,60,30
D,45,44,13,84,40,45,40
E,27,74,55,27,89,55,50


In [29]:
df['New'] = [100, 200, 300, 400, 500]

In [30]:
df

Unnamed: 0,U,V,W,X,Y,Z,New
A,81,99,99,26,24,59,100
B,83,86,95,38,29,31,200
C,41,46,46,83,89,60,300
D,45,44,13,84,40,45,400
E,27,74,55,27,89,55,500


## Deleting a Column

In [31]:
df

Unnamed: 0,U,V,W,X,Y,Z,New
A,81,99,99,26,24,59,100
B,83,86,95,38,29,31,200
C,41,46,46,83,89,60,300
D,45,44,13,84,40,45,400
E,27,74,55,27,89,55,500


In [33]:
df.drop('New', axis=1)

Unnamed: 0,U,V,W,X,Y,Z
A,81,99,99,26,24,59
B,83,86,95,38,29,31
C,41,46,46,83,89,60
D,45,44,13,84,40,45
E,27,74,55,27,89,55


In [35]:
df.drop('New', axis=1, inplace=True)

In [36]:
df

Unnamed: 0,U,V,W,X,Y,Z
A,81,99,99,26,24,59
B,83,86,95,38,29,31
C,41,46,46,83,89,60
D,45,44,13,84,40,45
E,27,74,55,27,89,55


## Conditional Selection

In [37]:
df

Unnamed: 0,U,V,W,X,Y,Z
A,81,99,99,26,24,59
B,83,86,95,38,29,31
C,41,46,46,83,89,60
D,45,44,13,84,40,45
E,27,74,55,27,89,55


In [38]:
df['X']

A    26
B    38
C    83
D    84
E    27
Name: X, dtype: int64

In [39]:
df['X'] % 2 == 0

A     True
B     True
C    False
D     True
E    False
Name: X, dtype: bool

In [40]:
df[df['X'] % 2 == 0]

Unnamed: 0,U,V,W,X,Y,Z
A,81,99,99,26,24,59
B,83,86,95,38,29,31
D,45,44,13,84,40,45


In [41]:
df[df['X'] % 2 == 0]['Y']

A    24
B    29
D    40
Name: Y, dtype: int64

In [42]:
(df['X'] % 2 == 0) & (df['X'] > 50)

A    False
B    False
C    False
D     True
E    False
Name: X, dtype: bool

In [43]:
df[(df['X'] % 2 == 0) & (df['X'] > 50)]

Unnamed: 0,U,V,W,X,Y,Z
D,45,44,13,84,40,45


## Setting an Index

In [44]:
df

Unnamed: 0,U,V,W,X,Y,Z
A,81,99,99,26,24,59
B,83,86,95,38,29,31
C,41,46,46,83,89,60
D,45,44,13,84,40,45
E,27,74,55,27,89,55


In [45]:
df.reset_index()

Unnamed: 0,index,U,V,W,X,Y,Z
0,A,81,99,99,26,24,59
1,B,83,86,95,38,29,31
2,C,41,46,46,83,89,60
3,D,45,44,13,84,40,45
4,E,27,74,55,27,89,55


In [46]:
df.reset_index(inplace=True)

In [47]:
df

Unnamed: 0,index,U,V,W,X,Y,Z
0,A,81,99,99,26,24,59
1,B,83,86,95,38,29,31
2,C,41,46,46,83,89,60
3,D,45,44,13,84,40,45
4,E,27,74,55,27,89,55


In [48]:
df['States'] = "PB RJ DL CHD J&K".split()

In [51]:
"PB RJ DL CHD J&K".split()

['PB', 'RJ', 'DL', 'CHD', 'J&K']

In [49]:
df

Unnamed: 0,index,U,V,W,X,Y,Z,States
0,A,81,99,99,26,24,59,PB
1,B,83,86,95,38,29,31,RJ
2,C,41,46,46,83,89,60,DL
3,D,45,44,13,84,40,45,CHD
4,E,27,74,55,27,89,55,J&K


In [50]:
df.set_index('States')

Unnamed: 0_level_0,index,U,V,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
PB,A,81,99,99,26,24,59
RJ,B,83,86,95,38,29,31
DL,C,41,46,46,83,89,60
CHD,D,45,44,13,84,40,45
J&K,E,27,74,55,27,89,55


## Missing Values

In [57]:
d = {"A":[1, 2, 3, np.nan], 
    "B":[5, np.nan, np.nan, np.nan],
    "C":[10, 20, 30, 40], 
    "D":[np.nan, np.nan, np.nan, np.nan]}

df = pd.DataFrame(d)
df

Unnamed: 0,A,B,C,D
0,1.0,5.0,10,
1,2.0,,20,
2,3.0,,30,
3,,,40,


In [58]:
df.isnull()

Unnamed: 0,A,B,C,D
0,False,False,False,True
1,False,True,False,True
2,False,True,False,True
3,True,True,False,True


In [59]:
df.isnull().sum()

A    1
B    3
C    0
D    4
dtype: int64

In [61]:
df.dropna(axis=1)

Unnamed: 0,C
0,10
1,20
2,30
3,40


In [62]:
df.dropna(axis=1, thresh=2)

Unnamed: 0,A,C
0,1.0,10
1,2.0,20
2,3.0,30
3,,40


In [63]:
df

Unnamed: 0,A,B,C,D
0,1.0,5.0,10,
1,2.0,,20,
2,3.0,,30,
3,,,40,


In [64]:
df.fillna("FILL")

Unnamed: 0,A,B,C,D
0,1.0,5.0,10,FILL
1,2.0,FILL,20,FILL
2,3.0,FILL,30,FILL
3,FILL,FILL,40,FILL


In [65]:
df.fillna(df.mean())

Unnamed: 0,A,B,C,D
0,1.0,5.0,10,
1,2.0,5.0,20,
2,3.0,5.0,30,
3,2.0,5.0,40,


In [66]:
df.fillna(0)

Unnamed: 0,A,B,C,D
0,1.0,5.0,10,0.0
1,2.0,0.0,20,0.0
2,3.0,0.0,30,0.0
3,0.0,0.0,40,0.0
