In [1]:
import numpy as np
import pandas as pd

In [2]:
data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [1, 2, 3, 4, 5],
    'C': [1, 2, 3, np.nan, np.nan],
    'D': [1, np.nan, np.nan, np.nan, 5]
}
df = pd.DataFrame(data)

In [3]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


Finding Missing Data

In [4]:
df.isna()

Unnamed: 0,A,B,C,D
0,False,False,False,False
1,False,False,False,True
2,True,False,False,True
3,False,False,True,True
4,False,False,True,False


In [5]:
df.isna().sum()

A    1
B    0
C    2
D    3
dtype: int64

In [6]:
df.isna().any()

A     True
B    False
C     True
D     True
dtype: bool

In [7]:
df.isna().sum().sum()

np.int64(6)

Removing Missing Data

In [8]:
df.dropna()

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0


In [9]:
df.dropna(axis=1)

Unnamed: 0,B
0,1
1,2
2,3
3,4
4,5


In [10]:
df.dropna(thresh=3)

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
4,5.0,5,,5.0


Filling Missing Data

In [11]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [12]:
df.fillna(0)

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,0.0
2,0.0,3,3.0,0.0
3,4.0,4,0.0,0.0
4,5.0,5,0.0,5.0


In [13]:
df['A'].fillna(df['A'].mean())

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
Name: A, dtype: float64

In [14]:
df.fillna(df.mean())

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,3.0
2,3.0,3,3.0,3.0
3,4.0,4,2.0,3.0
4,5.0,5,2.0,5.0


In [15]:
df.fillna(df.std())

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,2.828427
2,1.825742,3,3.0,2.828427
3,4.0,4,1.0,2.828427
4,5.0,5,1.0,5.0


In [16]:
df.fillna(df.ffill())

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,1.0
2,2.0,3,3.0,1.0
3,4.0,4,3.0,1.0
4,5.0,5,3.0,5.0


In [17]:
df.fillna(df.bfill())

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,5.0
2,4.0,3,3.0,5.0
3,4.0,4,,5.0
4,5.0,5,,5.0


In [18]:
values = {'A': 10, 'B': 0, 'C': 50, 'D': 100}
df.fillna(value=values)

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,100.0
2,10.0,3,3.0,100.0
3,4.0,4,50.0,100.0
4,5.0,5,50.0,5.0
