In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler

# =========================
# 1. LOAD DATA
# =========================
df = pd.read_csv("Churn_Modelling.csv")
print("Original Data Shape:", df.shape)


# =========================
# 2. DROP USELESS COLUMNS
# =========================
df = df.drop(["RowNumber", "CustomerId", "Surname"], axis=1)
print("After Dropping Columns Shape:", df.shape)


# =========================
# 3. ENCODE CATEGORICAL DATA
# =========================

# Encode Gender
le = LabelEncoder()
df["Gender"] = le.fit_transform(df["Gender"])

# Encode Geography using one hot encoding
df = pd.get_dummies(df, columns=["Geography"], drop_first=True)


# =========================
# 4. FEATURE SCALING
# =========================
X = df.drop("Exited", axis=1)
y = df["Exited"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)


# =========================
# 5. COMBINE & SAVE
# =========================
final_df = pd.concat([X_scaled_df, y.reset_index(drop=True)], axis=1)

final_df.to_csv("churn_preprocessed.csv", index=False)


print("✅ Preprocessing completed and file saved as data/churn_preprocessed.csv")


Original Data Shape: (10000, 14)
After Dropping Columns Shape: (10000, 11)
✅ Preprocessing completed and file saved as data/churn_preprocessed.csv
