In [None]:
# Question 1: Handling Missing Values with Conditional Filling
# Description: Fill missing values in a specific column based on a condition from another column.

import pandas as pd

def conditional_fill(df, target_col, condition_col, condition_value, fill_value):
    mask = (df[condition_col] == condition_value) & (df[target_col].isna())
    df.loc[mask, target_col] = fill_value
    return df


In [None]:
# Question 2: Removing Outliers by Rescaling
# Description: Remove outliers by standardizing a numerical column using z-scores.

import pandas as pd
import numpy as np

def remove_outliers_zscore(df, col):
    mean = df[col].mean()
    std = df[col].std()
    z_scores = (df[col] - mean) / std
    return df[(z_scores >= -3) & (z_scores <= 3)]


In [None]:
# Question 3: Applying Data Type Conversion
# Description: Convert the 'Age' column to integers after filling missing values.

def convert_age_to_int(df):
    df['Age'] = df['Age'].fillna(0).astype(int)
    return df


In [None]:
# Question 4: Automating Data Cleaning with Functions
# Description: Create a function that automates the process of filling missing values, removing duplicates, and standardizing column names.
def clean_data(df):
    df = df.fillna(method='ffill')
    df = df.drop_duplicates()
    df.columns = [col.lower().replace(' ', '_') for col in df.columns]
    return df



In [None]:
# Question 5: Complex Data Normalization
# Description: Normalize a numeric column to a range using min-max scaling.

def min_max_normalize(df, column):
    min_val = df[column].min()
    max_val = df[column].max()
    df[column] = (df[column] - min_val) / (max_val - min_val)
    return df
