### Task 1: Handling Missing Values - Simple Imputation
**Description**: Given a dataset with missing values, impute the missing values using the mean for numerical features and the mode for categorical features.

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# -------------------------------
# Core Functions
# -------------------------------

def impute_missing_values(df):
    if df.empty:
        raise ValueError("DataFrame is empty.")
    df_imputed = df.copy()
    for col in df.columns:
        if df[col].dtype in [np.float64, np.int64]:
            mean_val = df[col].mean()
            df_imputed[col] = df[col].fillna(mean_val)
        else:
            if df[col].dropna().empty:
                raise ValueError(f"Cannot compute mode for empty categorical column: {col}")
            mode_val = df[col].mode()[0]
            df_imputed[col] = df[col].fillna(mode_val)
    return df_imputed

def apply_minmax_scaling(df, feature):
    if feature not in df.columns:
        raise ValueError(f"Column '{feature}' not found.")
    if df[feature].nunique() <= 1:
        raise ValueError(f"Cannot scale feature '{feature}' with zero or one unique value.")
    scaler = MinMaxScaler()
    df_scaled = df.copy()
    df_scaled[feature + '_minmax'] = scaler.fit_transform(df[[feature]])
    return df_scaled

def drop_missing_rows(df):
    return df.dropna()

def apply_standard_scaling(df, feature):
    if feature not in df.columns:
        raise ValueError(f"Column '{feature}' not found.")
    if df[feature].nunique() <= 1:
        raise ValueError(f"Cannot standardize feature '{feature}' with zero or one unique value.")
    scaler = StandardScaler()
    df_scaled = df.copy()
    df_scaled[feature + '_std'] = scaler.fit_transform(df[[feature]])
    return df_scaled

# -------------------------------
# Test Cases
# -------------------------------

def test_imputation():
    df = pd.DataFrame({
        'age': [25, np.nan, 35],
        'gender': ['M', 'F', np.nan]
    })
    result = impute_missing_values(df)
    assert not result.isnull().values.any(), "Imputation failed."

def test_minmax_scaling():
    df = pd.DataFrame({'salary': [40000, 50000, 60000]})
    result = apply_minmax_scaling(df, 'salary')
    assert result['salary_minmax'].min() == 0.0
    assert result['salary_minmax'].max() == 1.0

def test_dropna():
    df = pd.DataFrame({'a': [1, np.nan, 3], 'b': ['x', 'y', np.nan]})
    result = drop_missing_rows(df)
    assert result.isnull().sum().sum() == 0

def test_standardization():
    df = pd.DataFrame({'age': [20, 30, 40]})
    result = apply_standard_scaling(df, 'age')
    mean = result['age_std'].mean().round()
    std = result['age_std'].std().round()
    assert mean == 0 and std == 1, "Standardization failed."

# -------------------------------
# Demonstration on Sample Data
# -------------------------------

if __name__ == "__main__":
    data = {
        'age': [25, np.nan, 35, 45, np.nan],
        'salary': [50000, 60000, np.nan, 80000, 90000],
        'gender': ['Male', 'Female', np.nan, 'Female', 'Male']
    }
    df = pd.DataFrame(data)

    print("🔹 Original DataFrame:\n", df)

    # Task 1: Impute
    df_imputed = impute_missing_values(df)
    print("\n✅ After Imputation:\n", df_imputed)

    # Task 2: Min-Max Scaling
    df_minmax = apply_minmax_scaling(df_imputed, 'salary')
    print("\n✅ Min-Max Scaled 'salary':\n", df_minmax[['salary', 'salary_minmax']])

    # Task 3: Drop missing
    df_dropped = drop_missing_rows(df)
    print("\n✅ After Dropping Missing Rows:\n", df_dropped)

    # Task 4: Standardization
    df_standard = apply_standard_scaling(df_imputed, 'age')
    print("\n✅ Standardized 'age':\n", df_standard[['age', 'age_std']])

    # Run tests
    test_imputation()
    test_minmax_scaling()
    test_dropna()
    test_standardization()
    print("\n✅ All tests passed.")

🔹 Original DataFrame:
     age   salary  gender
0  25.0  50000.0    Male
1   NaN  60000.0  Female
2  35.0      NaN     NaN
3  45.0  80000.0  Female
4   NaN  90000.0    Male

✅ After Imputation:
     age   salary  gender
0  25.0  50000.0    Male
1  35.0  60000.0  Female
2  35.0  70000.0  Female
3  45.0  80000.0  Female
4  35.0  90000.0    Male

✅ Min-Max Scaled 'salary':
     salary  salary_minmax
0  50000.0           0.00
1  60000.0           0.25
2  70000.0           0.50
3  80000.0           0.75
4  90000.0           1.00

✅ After Dropping Missing Rows:
     age   salary  gender
0  25.0  50000.0    Male
3  45.0  80000.0  Female

✅ Standardized 'age':
     age   age_std
0  25.0 -1.581139
1  35.0  0.000000
2  35.0  0.000000
3  45.0  1.581139
4  35.0  0.000000

✅ All tests passed.


In [1]:
# write your code from here

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Simulated dataset with mixed data types and missing values
data = {
    'age': [25, np.nan, 35, 45, np.nan],
    'salary': [50000, 60000, np.nan, 80000, 90000],
    'gender': ['Male', 'Female', np.nan, 'Female', 'Male']
}
df = pd.DataFrame(data)
print("🔹 Original DataFrame:\n", df)

# --- Task 1: Simple Imputation ---
def impute_missing_values(df):
    df_imputed = df.copy()
    for column in df.columns:
        if df[column].dtype in [np.float64, np.int64]:
            mean_val = df[column].mean()
            df_imputed[column] = df[column].fillna(mean_val)
        else:
            mode_val = df[column].mode()[0]
            df_imputed[column] = df[column].fillna(mode_val)
    return df_imputed

df_imputed = impute_missing_values(df)
print("\n✅ After Imputation:\n", df_imputed)

# --- Task 2: Min-Max Normalization ---
def apply_minmax_scaling(df, feature):
    scaler = MinMaxScaler()
    df_scaled = df.copy()
    df_scaled[feature + '_minmax'] = scaler.fit_transform(df[[feature]])
    return df_scaled

df_minmax = apply_minmax_scaling(df_imputed, 'salary')
print("\n✅ After Min-Max Scaling on 'salary':\n", df_minmax[['salary', 'salary_minmax']])

# --- Task 3: Drop Rows with Missing Values ---
df_dropped = df.dropna()
print("\n✅ After Dropping Rows with Missing Values:\n", df_dropped)

# --- Task 4: Standardization ---
def apply_standard_scaling(df, feature):
    scaler = StandardScaler()
    df_scaled = df.copy()
    df_scaled[feature + '_std'] = scaler.fit_transform(df[[feature]])
    return df_scaled

df_standard = apply_standard_scaling(df_imputed, 'age')
print("\n✅ After Standardization on 'age':\n", df_standard[['age', 'age_std']])

🔹 Original DataFrame:
     age   salary  gender
0  25.0  50000.0    Male
1   NaN  60000.0  Female
2  35.0      NaN     NaN
3  45.0  80000.0  Female
4   NaN  90000.0    Male

✅ After Imputation:
     age   salary  gender
0  25.0  50000.0    Male
1  35.0  60000.0  Female
2  35.0  70000.0  Female
3  45.0  80000.0  Female
4  35.0  90000.0    Male

✅ After Min-Max Scaling on 'salary':
     salary  salary_minmax
0  50000.0           0.00
1  60000.0           0.25
2  70000.0           0.50
3  80000.0           0.75
4  90000.0           1.00

✅ After Dropping Rows with Missing Values:
     age   salary  gender
0  25.0  50000.0    Male
3  45.0  80000.0  Female

✅ After Standardization on 'age':
     age   age_std
0  25.0 -1.581139
1  35.0  0.000000
2  35.0  0.000000
3  45.0  1.581139
4  35.0  0.000000


### Task 2: Feature Scaling - Min-Max Normalization
**Description**: Normalize a numerical feature using Min-Max scaling to a range [0, 1].

In [None]:
# write your code from here

### Task 3: Handling Missing Values - Drop Missing Values
**Description**: Remove rows with missing values from a dataset.

In [None]:
# write your code from here

### Task 4: Feature Scaling - Standardization
**Description**: Standardize a numerical feature to have zero mean and unit variance.

In [None]:
# write your code from here