In [2]:
import pandas as pd
import numpy as np

In [4]:
# Creating a DataFrame with missing values
data = {
    'Name': ['Alice', 'Bob', 'Charlie', np.nan, 'Emily'],
    'Age': [24, np.nan, 22, 25, np.nan],
    'Score': [85, 90, np.nan, 88, 95],
}
df = pd.DataFrame(data)


In [5]:
df

Unnamed: 0,Name,Age,Score
0,Alice,24.0,85.0
1,Bob,,90.0
2,Charlie,22.0,
3,,25.0,88.0
4,Emily,,95.0


In [6]:
# .isnull()
df.isnull()

Unnamed: 0,Name,Age,Score
0,False,False,False
1,False,True,False
2,False,False,True
3,True,False,False
4,False,True,False


In [7]:
# .notnull()
df.notnull()

Unnamed: 0,Name,Age,Score
0,True,True,True
1,True,False,True
2,True,True,False
3,False,True,True
4,True,False,True


In [9]:
# sum() null entries in each column
df.isnull().sum()

Name     1
Age      2
Score    1
dtype: int64

In [10]:
# Drop all rows that have missing value
df.dropna()

Unnamed: 0,Name,Age,Score
0,Alice,24.0,85.0


In [11]:
# Drop all column that have missing value
df.dropna(axis=1)

0
1
2
3
4


In [12]:
# Fill NaN with Unknown
df.fillna('Unknown')

Unnamed: 0,Name,Age,Score
0,Alice,24.0,85.0
1,Bob,Unknown,90.0
2,Charlie,22.0,Unknown
3,Unknown,25.0,88.0
4,Emily,Unknown,95.0


In [13]:
# Fill NaN with forward fill (fill from previous value)
df.fillna(method='ffill')

  df.fillna(method='ffill')


Unnamed: 0,Name,Age,Score
0,Alice,24.0,85.0
1,Bob,24.0,90.0
2,Charlie,22.0,90.0
3,Charlie,25.0,88.0
4,Emily,25.0,95.0


In [15]:
# Fill NaN with backward fill (fill from last value)
df.fillna(method='bfill')

  df.fillna(method='bfill')


Unnamed: 0,Name,Age,Score
0,Alice,24.0,85.0
1,Bob,22.0,90.0
2,Charlie,22.0,88.0
3,Emily,25.0,88.0
4,Emily,,95.0


In [18]:
# Fill numerical column that has NaN with mean data
mean_age = df['Age'].mean()
mean_age

np.float64(23.666666666666668)

In [20]:
df['Age'] = df['Age'].fillna(mean_age)
df

Unnamed: 0,Name,Age,Score
0,Alice,24.0,85.0
1,Bob,23.666667,90.0
2,Charlie,22.0,
3,,25.0,88.0
4,Emily,23.666667,95.0


In [21]:
# Fill numerical column that has NaN with mean data
fill_values = { 'Name': 'NoName', 'Age': df['Age'].mean(), 'Score': df['Score'].median()}
df_filled_dict = df.fillna(fill_values)

df_filled_dict

Unnamed: 0,Name,Age,Score
0,Alice,24.0,85.0
1,Bob,23.666667,90.0
2,Charlie,22.0,89.0
3,NoName,25.0,88.0
4,Emily,23.666667,95.0
