In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, PowerTransformer

ip_file = '.csv'
op_file = '.csv'
df = pd.read_csv(ip_file)

In [None]:
print("Initial Shape:", df.shape)
print("\nMissing Values:\n", df.isnull().sum())

In [None]:
for col in df.select_dtypes(include=np.number).columns:
    df[col].fillna(df[col].mean(), inplace=True)

In [None]:
def remove_outliers_iqr(data, columns):
    for col in columns:
        Q1 = data[col].quantile(0.25)
        Q3 = data[col].quantile(0.75)
        IQR = Q3 - Q1
        lower = Q1 - 1.5 * IQR
        upper = Q3 + 1.5 * IQR
        before = data.shape[0]
        data = data[(data[col] >= lower) & (data[col] <= upper)]
        after = data.shape[0]
        print(f"{col}: Removed {before - after} outliers")
    return data

num_cols = df.select_dtypes(include=np.number).columns
df = remove_outliers_iqr(df, num_cols)

In [None]:
transformation = "" 

if transformation == "standard":
    scaler = StandardScaler()
    df[num_cols] = scaler.fit_transform(df[num_cols])
    print("Applied StandardScaler (Z-score)")

elif transformation == "minmax":
    scaler = MinMaxScaler()
    df[num_cols] = scaler.fit_transform(df[num_cols])
    print("Applied MinMaxScaler (0–1 normalization)")

elif transformation == "robust":
    scaler = RobustScaler()
    df[num_cols] = scaler.fit_transform(df[num_cols])
    print("Applied RobustScaler (good for outliers)")

elif transformation == "log":
    for col in num_cols:
        if (df[col] <= 0).any():
            df[col] = df[col] + abs(df[col].min()) + 1
        df[col] = np.log(df[col])
    print("Applied Log Transformation")

elif transformation == "power":
    transformer = PowerTransformer(method='yeo-johnson')
    df[num_cols] = transformer.fit_transform(df[num_cols])
    print("Applied PowerTransformer (Yeo-Johnson)")

In [None]:
df.to_csv(op_file, index=False)