In [56]:
import pandas as pd
import numpy as np

In [57]:
df = pd.DataFrame([ [np.nan, 2, np.nan, 0], 
                    [3, 4, np.nan, 1], 
                    [np.nan, np.nan, np.nan, 5], 
                    [np.nan, 3, np.nan, 4],
                    [np.nan, np.nan, np.nan, np.nan] ],
                    columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
2,,,,5.0
3,,3.0,,4.0
4,,,,


In [58]:
df.isnull().sum()

A    4
B    2
C    5
D    1
dtype: int64

In [59]:
df.isna()

Unnamed: 0,A,B,C,D
0,True,False,True,False
1,False,False,True,False
2,True,True,True,False
3,True,False,True,False
4,True,True,True,True


In [60]:
pd.isna(df['A'])

0     True
1    False
2     True
3     True
4     True
Name: A, dtype: bool

In [61]:
df.notna()

Unnamed: 0,A,B,C,D
0,False,True,False,True
1,True,True,False,True
2,False,False,False,True
3,False,True,False,True
4,False,False,False,False


In [62]:
df

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
2,,,,5.0
3,,3.0,,4.0
4,,,,


In [63]:
df.fillna(0)

Unnamed: 0,A,B,C,D
0,0.0,2.0,0.0,0.0
1,3.0,4.0,0.0,1.0
2,0.0,0.0,0.0,5.0
3,0.0,3.0,0.0,4.0
4,0.0,0.0,0.0,0.0


In [64]:
df.fillna(df.mean())

Unnamed: 0,A,B,C,D
0,3.0,2.0,,0.0
1,3.0,4.0,,1.0
2,3.0,3.0,,5.0
3,3.0,3.0,,4.0
4,3.0,3.0,,2.5


In [65]:
df.fillna(df.mean()['A':'B'])

Unnamed: 0,A,B,C,D
0,3.0,2.0,,0.0
1,3.0,4.0,,1.0
2,3.0,3.0,,5.0
3,3.0,3.0,,4.0
4,3.0,3.0,,


In [66]:
df

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
2,,,,5.0
3,,3.0,,4.0
4,,,,


In [67]:
df.fillna(method='ffill')

  df.fillna(method='ffill')


Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
2,3.0,4.0,,5.0
3,3.0,3.0,,4.0
4,3.0,3.0,,4.0


In [68]:
df.fillna(value={'A': 0, 'B': 1, 'C': 2, 'D': 3}, limit=1)

Unnamed: 0,A,B,C,D
0,0.0,2.0,2.0,0.0
1,3.0,4.0,,1.0
2,,1.0,,5.0
3,,3.0,,4.0
4,,,,3.0


In [69]:
df.fillna(value={'A': 0, 'B': 1, 'C': 2, 'D': 3})

Unnamed: 0,A,B,C,D
0,0.0,2.0,2.0,0.0
1,3.0,4.0,2.0,1.0
2,0.0,1.0,2.0,5.0
3,0.0,3.0,2.0,4.0
4,0.0,1.0,2.0,3.0


In [70]:
df

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
2,,,,5.0
3,,3.0,,4.0
4,,,,


In [71]:
df.dropna()

Unnamed: 0,A,B,C,D


In [72]:
df.dropna(axis=0)

Unnamed: 0,A,B,C,D


In [73]:
df.dropna(axis=1)

0
1
2
3
4


In [74]:
df['B'].dropna()

0    2.0
1    4.0
3    3.0
Name: B, dtype: float64

In [75]:
df

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
2,,,,5.0
3,,3.0,,4.0
4,,,,


In [76]:
df.dropna(thresh=2)  #drop rows that have not at least 2 non-NaN values

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
3,,3.0,,4.0


In [77]:
df.dropna(how='all')  #only drop rows where all columns are NaN

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
2,,,,5.0
3,,3.0,,4.0


In [78]:
df

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
2,,,,5.0
3,,3.0,,4.0
4,,,,


In [79]:
df.dropna(subset=['B'])  # only drop rows where NaN appear in specific columns B

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
3,,3.0,,4.0


In [80]:
#Imputing missing values

In [81]:
from sklearn.impute import SimpleImputer
imr = SimpleImputer(strategy='mean')
imr = imr.fit(df)
imputed_data = imr.transform(df.values)



In [82]:
df

Unnamed: 0,A,B,C,D
0,,2.0,,0.0
1,3.0,4.0,,1.0
2,,,,5.0
3,,3.0,,4.0
4,,,,


In [83]:
imputed_data

array([[3. , 2. , 0. ],
       [3. , 4. , 1. ],
       [3. , 3. , 5. ],
       [3. , 3. , 4. ],
       [3. , 3. , 2.5]])