In [1]:
import pandas as pd
import numpy as np

In [2]:
# Creating a DataFrame for Game of Thrones characters with missing data
data = {
    'Name': ['Jon Snow', 'Daenerys Targaryen', 'Tyrion Lannister', 'Arya Stark', 'Cersei Lannister'],
    'House': ['Stark', 'Targaryen', np.nan, 'Stark', 'Lannister'],
    'Allegiance': ['Night\'s Watch', np.nan, 'House Lannister', 'House Stark', 'House Lannister'],
    'Status': ['Alive', 'Deceased', 'Alive', np.nan, 'Deceased'],
    'Age': [30, np.nan, 38, 20, 42]
}

In [3]:
got_df = pd.DataFrame(data)
print("Game of Thrones Characters DataFrame with Missing Data:")
print(got_df)

Game of Thrones Characters DataFrame with Missing Data:
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen              NaN  Deceased   NaN
2    Tyrion Lannister        NaN  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark       NaN  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [4]:
# Example 1: Checking for missing values in the DataFrame
print("\nExample 1: Checking for missing values in the DataFrame:")
print(got_df.isnull())


Example 1: Checking for missing values in the DataFrame:
    Name  House  Allegiance  Status    Age
0  False  False       False   False  False
1  False  False        True   False   True
2  False   True       False   False  False
3  False  False       False    True  False
4  False  False       False   False  False


In [5]:
# Example 2: Counting the number of missing values in each column
print("\nExample 2: Counting the number of missing values in each column:")
print(got_df.isnull().sum())


Example 2: Counting the number of missing values in each column:
Name          0
House         1
Allegiance    1
Status        1
Age           1
dtype: int64


In [6]:
# Example 3: Counting the total number of missing values in the DataFrame
print("\nExample 3: Counting the total number of missing values in the DataFrame:")
print(got_df.isnull().sum().sum())


Example 3: Counting the total number of missing values in the DataFrame:
4


In [7]:
# Example 4: Dropping rows with any missing values
print("\nExample 4: Dropping rows with any missing values:")
got_df_dropped = got_df.dropna()
print(got_df_dropped)


Example 4: Dropping rows with any missing values:
               Name      House       Allegiance    Status   Age
0          Jon Snow      Stark    Night's Watch     Alive  30.0
4  Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [8]:
# Example 5: Dropping rows with all missing values
print("\nExample 5: Dropping rows with all missing values:")
got_df_dropped_all = got_df.dropna(how='all')
print(got_df_dropped_all)



Example 5: Dropping rows with all missing values:
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen              NaN  Deceased   NaN
2    Tyrion Lannister        NaN  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark       NaN  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [9]:
# Example 6: Dropping columns with any missing values
print("\nExample 6: Dropping columns with any missing values:")
got_df_dropped_columns = got_df.dropna(axis=1)
print(got_df_dropped_columns)



Example 6: Dropping columns with any missing values:
                 Name
0            Jon Snow
1  Daenerys Targaryen
2    Tyrion Lannister
3          Arya Stark
4    Cersei Lannister


In [10]:
# Example 7: Dropping columns with all missing values
print("\nExample 7: Dropping columns with all missing values:")
got_df_dropped_columns_all = got_df.dropna(axis=1, how='all')
print(got_df_dropped_columns_all)


Example 7: Dropping columns with all missing values:
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen              NaN  Deceased   NaN
2    Tyrion Lannister        NaN  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark       NaN  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [11]:
# Example 8: Filling missing values with a specific value
print("\nExample 8: Filling missing values with a specific value (House with 'Unknown'):")
got_df_filled = got_df.fillna(value='Unknown')
print(got_df_filled)


Example 8: Filling missing values with a specific value (House with 'Unknown'):
                 Name      House       Allegiance    Status      Age
0            Jon Snow      Stark    Night's Watch     Alive     30.0
1  Daenerys Targaryen  Targaryen          Unknown  Deceased  Unknown
2    Tyrion Lannister    Unknown  House Lannister     Alive     38.0
3          Arya Stark      Stark      House Stark   Unknown     20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased     42.0


In [12]:
# Example 9: Filling missing values with the mean of the column
print("\nExample 9: Filling missing values with the mean of the column (Age with mean age):")
mean_age = got_df['Age'].mean()
got_df_filled_age = got_df.fillna(value={'Age': mean_age})
print(got_df_filled_age)



Example 9: Filling missing values with the mean of the column (Age with mean age):
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen              NaN  Deceased  32.5
2    Tyrion Lannister        NaN  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark       NaN  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [13]:
# Example 10: Filling missing values with the forward fill method (ffill)
print("\nExample 10: Filling missing values with the forward fill method (ffill):")
got_df_ffill = got_df.fillna(method='ffill')
print(got_df_ffill)


Example 10: Filling missing values with the forward fill method (ffill):
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen    Night's Watch  Deceased  30.0
2    Tyrion Lannister  Targaryen  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark     Alive  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [14]:
# Example 11: Filling missing values with the backward fill method (bfill)
print("\nExample 11: Filling missing values with the backward fill method (bfill):")
got_df_bfill = got_df.fillna(method='bfill')
print(got_df_bfill)



Example 11: Filling missing values with the backward fill method (bfill):
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen  House Lannister  Deceased  38.0
2    Tyrion Lannister      Stark  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark  Deceased  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [15]:
# Example 12: Interpolate missing values using linear interpolation method
print("\nExample 12: Interpolate missing values using linear interpolation method:")
got_df_linear_interpolate = got_df.interpolate(method='linear')
print(got_df_linear_interpolate)


Example 12: Interpolate missing values using linear interpolation method:
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen              NaN  Deceased  34.0
2    Tyrion Lannister        NaN  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark       NaN  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [16]:
# Example 13: Interpolate missing values using polynomial interpolation method
print("\nExample 13: Interpolate missing values using polynomial interpolation method:")
got_df_poly_interpolate = got_df.interpolate(method='polynomial', order=2)
print(got_df_poly_interpolate)


Example 13: Interpolate missing values using polynomial interpolation method:
                 Name      House       Allegiance    Status        Age
0            Jon Snow      Stark    Night's Watch     Alive  30.000000
1  Daenerys Targaryen  Targaryen              NaN  Deceased  44.210526
2    Tyrion Lannister        NaN  House Lannister     Alive  38.000000
3          Arya Stark      Stark      House Stark       NaN  20.000000
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.000000


In [17]:
# Example 14: Interpolate missing values using time-based interpolation method
print("\nExample 14: Interpolate missing values using time-based interpolation method:")
got_df_time_interpolate = got_df.interpolate(method='time')
print(got_df_time_interpolate)


Example 14: Interpolate missing values using time-based interpolation method:


ValueError: time-weighted interpolation only works on Series or DataFrames with a DatetimeIndex

In [18]:
# Example 15: Replacing missing values with the mean of each House
print("\nExample 15: Replacing missing values with the mean of each House:")
got_df_mean_by_house = got_df.groupby('House').transform(lambda x: x.fillna(x.mean()))
print(got_df_mean_by_house)


Example 15: Replacing missing values with the mean of each House:
    Age
0  30.0
1   NaN
2   NaN
3  20.0
4  42.0


  got_df_mean_by_house = got_df.groupby('House').transform(lambda x: x.fillna(x.mean()))


In [19]:
# Example 16: Using forward fill method within each House group
print("\nExample 16: Using forward fill method within each House group:")
got_df_ffill_by_house = got_df.groupby('House').fillna(method='ffill')
print(got_df_ffill_by_house)


Example 16: Using forward fill method within each House group:
                 Name       Allegiance    Status   Age
0  Daenerys Targaryen              NaN  Deceased   NaN
1    Cersei Lannister  House Lannister  Deceased  42.0
2                 NaN              NaN       NaN   NaN
3          Arya Stark      House Stark     Alive  20.0
4            Jon Snow    Night's Watch     Alive  30.0


In [20]:
# Example 17: Using backward fill method within each House group
print("\nExample 17: Using backward fill method within each House group:")
got_df_bfill_by_house = got_df.groupby('House').fillna(method='bfill')
print(got_df_bfill_by_house)


Example 17: Using backward fill method within each House group:
                 Name       Allegiance    Status   Age
0  Daenerys Targaryen              NaN  Deceased   NaN
1    Cersei Lannister  House Lannister  Deceased  42.0
2                 NaN              NaN       NaN   NaN
3          Arya Stark      House Stark       NaN  20.0
4            Jon Snow    Night's Watch     Alive  30.0


In [21]:
# Example 18: Dropping rows with any missing values in specific columns
print("\nExample 18: Dropping rows with any missing values in specific columns:")
got_df_dropped_specific_cols = got_df.dropna(subset=['House', 'Allegiance'])
print(got_df_dropped_specific_cols)



Example 18: Dropping rows with any missing values in specific columns:
               Name      House       Allegiance    Status   Age
0          Jon Snow      Stark    Night's Watch     Alive  30.0
3        Arya Stark      Stark      House Stark       NaN  20.0
4  Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [22]:
# Example 19: Dropping rows with all missing values in specific columns
print("\nExample 19: Dropping rows with all missing values in specific columns:")
got_df_dropped_all_specific_cols = got_df.dropna(subset=['House', 'Allegiance'], how='all')
print(got_df_dropped_all_specific_cols)


Example 19: Dropping rows with all missing values in specific columns:
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen              NaN  Deceased   NaN
2    Tyrion Lannister        NaN  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark       NaN  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [23]:
# Example 20: Filling missing values with the median of the column
print("\nExample 20: Filling missing values with the median of the column (Age with median age):")
median_age = got_df['Age'].median()
got_df_filled_median_age = got_df.fillna(value={'Age': median_age})
print(got_df_filled_median_age)



Example 20: Filling missing values with the median of the column (Age with median age):
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen              NaN  Deceased  34.0
2    Tyrion Lannister        NaN  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark       NaN  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [24]:
# Example 21: Filling missing values with the mode of the column
print("\nExample 21: Filling missing values with the mode of the column (House with mode House):")
mode_house = got_df['House'].mode().iloc[0]
got_df_filled_mode_house = got_df.fillna(value={'House': mode_house})
print(got_df_filled_mode_house)


Example 21: Filling missing values with the mode of the column (House with mode House):
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen              NaN  Deceased   NaN
2    Tyrion Lannister      Stark  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark       NaN  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [25]:
# Example 22: Filling missing values with the previous and next values using pad and backfill methods
print("\nExample 22: Filling missing values with the previous and next values using pad and backfill methods:")
got_df_pad_backfill = got_df.fillna(method='pad').fillna(method='backfill')
print(got_df_pad_backfill)


Example 22: Filling missing values with the previous and next values using pad and backfill methods:
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen    Night's Watch  Deceased  30.0
2    Tyrion Lannister  Targaryen  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark     Alive  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [26]:
# Example 23: Using the interpolate method with time index for missing values
print("\nExample 23: Using the interpolate method with time index for missing values:")
got_df_time_index = got_df.set_index(pd.date_range(start='2023-01-01', periods=5, freq='Y'))
got_df_interpolated_time_index = got_df_time_index.interpolate(method='time')
print(got_df_interpolated_time_index)


Example 23: Using the interpolate method with time index for missing values:
                          Name      House       Allegiance    Status  \
2023-12-31            Jon Snow      Stark    Night's Watch     Alive   
2024-12-31  Daenerys Targaryen  Targaryen              NaN  Deceased   
2025-12-31    Tyrion Lannister        NaN  House Lannister     Alive   
2026-12-31          Arya Stark      Stark      House Stark       NaN   
2027-12-31    Cersei Lannister  Lannister  House Lannister  Deceased   

                  Age  
2023-12-31  30.000000  
2024-12-31  34.005472  
2025-12-31  38.000000  
2026-12-31  20.000000  
2027-12-31  42.000000  


In [27]:
# Example 24: Using the interpolate method with limit parameter for limiting consecutive interpolations
print("\nExample 24: Using the interpolate method with limit parameter for limiting consecutive interpolations:")
got_df_interpolate_limit = got_df.interpolate(limit=1)
print(got_df_interpolate_limit)


Example 24: Using the interpolate method with limit parameter for limiting consecutive interpolations:
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen              NaN  Deceased  34.0
2    Tyrion Lannister        NaN  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark       NaN  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0


In [28]:
# Example 25: Using the interpolate method with limit_direction parameter for controlling interpolation direction
print("\nExample 25: Using the interpolate method with limit_direction parameter for controlling interpolation direction:")
got_df_interpolate_direction = got_df.interpolate(limit_direction='backward')
print(got_df_interpolate_direction)


Example 25: Using the interpolate method with limit_direction parameter for controlling interpolation direction:
                 Name      House       Allegiance    Status   Age
0            Jon Snow      Stark    Night's Watch     Alive  30.0
1  Daenerys Targaryen  Targaryen              NaN  Deceased  34.0
2    Tyrion Lannister        NaN  House Lannister     Alive  38.0
3          Arya Stark      Stark      House Stark       NaN  20.0
4    Cersei Lannister  Lannister  House Lannister  Deceased  42.0
