In [1]:
import numpy as np 
import pandas as pd

In [2]:

data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [1, 2, 3, 4, 5],
    'C': [1, 2, 3, np.nan, np.nan],
    'D': [1, np.nan, np.nan, np.nan, 5]
}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


Finding missing data

In [3]:
df.isna() # Checking for missing values 

Unnamed: 0,A,B,C,D
0,False,False,False,False
1,False,False,False,True
2,True,False,False,True
3,False,False,True,True
4,False,False,True,False


In [4]:
df.isna().sum() # Counting missing values in each column

A    1
B    0
C    2
D    3
dtype: int64

In [5]:
df.isna().any() # Checking if any missing values in each column

A     True
B    False
C     True
D     True
dtype: bool

### Removing Missing Data

In [6]:
df


Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [7]:
df.dropna()  # Dropping rows with any missing values

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0


In [9]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [10]:
df.dropna(thresh=1) # Dropping rows with at least 1 non-missing value

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


### Filling the missing Data

In [11]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [12]:
df.fillna(0) # Filling missing values with 0

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,0.0
2,0.0,3,3.0,0.0
3,4.0,4,0.0,0.0
4,5.0,5,0.0,5.0


In [13]:
values = {'A': 0, 'B': 99, 'C': 100, 'D': 200}
df.fillna(value=values) # Filling missing values with different values for each column

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,200.0
2,0.0,3,3.0,200.0
3,4.0,4,100.0,200.0
4,5.0,5,100.0,5.0


In [14]:
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [15]:
df.fillna(df.mean()) # Filling missing values with mean of each column

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,3.0
2,3.0,3,3.0,3.0
3,4.0,4,2.0,3.0
4,5.0,5,2.0,5.0
