In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    'Name': ['John', 'Alice', 'Bob', 'NaN', 'Eve'],
    'Age': [25, np.nan, 22, 23, np.nan],
    'City': ['New York', 'Los Angeles', np.nan, 'Chicago', 'Houston']
}

df = pd.DataFrame(data)

print("Original DataFrame with Missing Values:")
print(df)

Original DataFrame with Missing Values:
    Name   Age         City
0   John  25.0     New York
1  Alice   NaN  Los Angeles
2    Bob  22.0          NaN
3    NaN  23.0      Chicago
4    Eve   NaN      Houston


In [3]:
print("Check for missing values using isnull():")
print(df.isnull())

Check for missing values using isnull():
    Name    Age   City
0  False  False  False
1  False   True  False
2  False  False   True
3  False  False  False
4  False   True  False


In [4]:
print("Count of missing values in each column:")
print(df.isnull().sum())

Count of missing values in each column:
Name    0
Age     2
City    1
dtype: int64


In [5]:
df_dropna = df.dropna()

print("DataFrame after dropping rows with missing values:")
print(df_dropna)

DataFrame after dropping rows with missing values:
   Name   Age      City
0  John  25.0  New York
3   NaN  23.0   Chicago


In [6]:
df_fillna = df.fillna({
    'Name': 'Unknown',
    'Age': df['Age'].mean(),  # Filling Age with average
    'City': 'Unknown City'
})

print("DataFrame after filling missing values:")
print(df_fillna)

DataFrame after filling missing values:
    Name        Age          City
0   John  25.000000      New York
1  Alice  23.333333   Los Angeles
2    Bob  22.000000  Unknown City
3    NaN  23.000000       Chicago
4    Eve  23.333333       Houston


In [7]:
# Replace string "NaN" with actual np.nan
df['Name'] = df['Name'].replace("NaN", np.nan)

# Interpolate Age column
df_interpolated = df.copy()
df_interpolated['Age'] = df_interpolated['Age'].interpolate()

print("DataFrame after interpolating missing 'Age' values:")
print(df_interpolated)

DataFrame after interpolating missing 'Age' values:
    Name   Age         City
0   John  25.0     New York
1  Alice  23.5  Los Angeles
2    Bob  22.0          NaN
3    NaN  23.0      Chicago
4    Eve  23.0      Houston


In [8]:
# Forward fill using the updated method
df_ffill = df.ffill()

# Backward fill using the updated method
df_bfill = df.bfill()

print("Forward Fill (ffill):")
print(df_ffill)

print("\nBackward Fill (bfill):")
print(df_bfill)

Forward Fill (ffill):
    Name   Age         City
0   John  25.0     New York
1  Alice  25.0  Los Angeles
2    Bob  22.0  Los Angeles
3    Bob  23.0      Chicago
4    Eve  23.0      Houston

Backward Fill (bfill):
    Name   Age         City
0   John  25.0     New York
1  Alice  22.0  Los Angeles
2    Bob  22.0      Chicago
3    Eve  23.0      Chicago
4    Eve   NaN      Houston
