___



# Missing Data

Let's show a few convenient methods to deal with Missing Data in pandas:

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({'A':[1,2,np.nan],
                  'B':[5,np.nan,np.nan],
                  'C':[1,2,3]})

In [3]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [4]:
df.dropna()

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [5]:
df.dropna(axis=1)

Unnamed: 0,C
0,1
1,2
2,3


In [6]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


In [9]:
df.fillna(value='VALUE')

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,VALUE,2
2,VALUE,VALUE,3


In [8]:
df['A'].fillna(value=df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64

# Great Job!

In [48]:
data = pd.DataFrame({'W':[3,np.nan,np.nan,7],
                  'X':[7,5,np.nan,9],
                  'Y':[8,9,np.nan,3],
                    'Z':[4,np.nan,7,6],
                    'S':[6,5,8,1]})

In [49]:
data

Unnamed: 0,W,X,Y,Z,S
0,3.0,7.0,8.0,4.0,6
1,,5.0,9.0,,5
2,,,,7.0,8
3,7.0,9.0,3.0,6.0,1


In [50]:
data.dropna()

Unnamed: 0,W,X,Y,Z,S
0,3.0,7.0,8.0,4.0,6
3,7.0,9.0,3.0,6.0,1


In [51]:
data.dropna(axis=1)

Unnamed: 0,S
0,6
1,5
2,8
3,1


In [57]:
data.dropna(thresh=3)

Unnamed: 0,W,X,Y,Z,S
0,3.0,7.0,8.0,4.0,6
1,,5.0,9.0,,5
3,7.0,9.0,3.0,6.0,1


In [58]:
data.fillna(value=0)

Unnamed: 0,W,X,Y,Z,S
0,3.0,7.0,8.0,4.0,6
1,0.0,5.0,9.0,0.0,5
2,0.0,0.0,0.0,7.0,8
3,7.0,9.0,3.0,6.0,1


In [59]:
data

Unnamed: 0,W,X,Y,Z,S
0,3.0,7.0,8.0,4.0,6
1,,5.0,9.0,,5
2,,,,7.0,8
3,7.0,9.0,3.0,6.0,1


In [61]:
data[['X']].fillna(value=data[['X']].mean())

Unnamed: 0,X
0,7.0
1,5.0
2,7.0
3,9.0
