In [2]:
import pandas as pd
import numpy as np

In [3]:
data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [6, 2, 3, 4, 5],
    'C': [1, 2, 3, np.nan, np.nan],
    'D': [1, np.nan, np.nan, np.nan, 5]
}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C,D
0,1.0,6,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


# Finding Missing Data

## Check Missing Values

In [4]:
df.isna()

Unnamed: 0,A,B,C,D
0,False,False,False,False
1,False,False,False,True
2,True,False,False,True
3,False,False,True,True
4,False,False,True,False


## Count Missing Values

In [5]:
df.isna().sum()

A    1
B    0
C    2
D    3
dtype: int64

## Findind is All Data Contain None value

For Column

In [6]:
df.isna().any()

A     True
B    False
C     True
D     True
dtype: bool

For Rows

In [7]:
df.isna().any(axis=1)

0    False
1     True
2     True
3     True
4     True
dtype: bool

# Removing Missing Data

## Drop rows with missing values

In [8]:
df.dropna()

Unnamed: 0,A,B,C,D
0,1.0,6,1.0,1.0


## Drop columns with missing values

In [9]:
df.dropna(axis=1)

Unnamed: 0,B
0,6
1,2
2,3
3,4
4,5


## Thresh

In [10]:
df.dropna(thresh = 3)

Unnamed: 0,A,B,C,D
0,1.0,6,1.0,1.0
1,2.0,2,2.0,
4,5.0,5,,5.0


# Filling Missing Data

## Fill missing values (most common)

In [11]:
df.fillna(0)

Unnamed: 0,A,B,C,D
0,1.0,6,1.0,1.0
1,2.0,2,2.0,0.0
2,0.0,3,3.0,0.0
3,4.0,4,0.0,0.0
4,5.0,5,0.0,5.0


## Fill with column mean (Data Science style ðŸ”¥)

In [12]:
df.fillna(df.mean())

Unnamed: 0,A,B,C,D
0,1.0,6,1.0,1.0
1,2.0,2,2.0,3.0
2,3.0,3,3.0,3.0
3,4.0,4,2.0,3.0
4,5.0,5,2.0,5.0


## Fill with value method

In [14]:
val = {
    "A" : 3,
    "B" : 2,
    "C" : 6,
    "D" : 1.5
}
df.fillna(value = val)

Unnamed: 0,A,B,C,D
0,1.0,6,1.0,1.0
1,2.0,2,2.0,1.5
2,3.0,3,3.0,1.5
3,4.0,4,6.0,1.5
4,5.0,5,6.0,5.0
