In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from xgboost import XGBClassifier  # pip install xgboost if needed

In [3]:
df = pd.read_csv('Churn_Modelling.csv')

df = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

#fatures and target
X = df.drop('Exited', axis=1)
y = df['Exited']

#categorical and numerical features
categorical_features = ['Geography', 'Gender']
numerical_features = ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary']

In [5]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(drop='first'), categorical_features)
    ])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [7]:
#function to train and evaluate
def train_evaluate(model, name):
    pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier', model)])
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    print(f"\n=== {name} Results ===")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [9]:
#logistic regression
lr = LogisticRegression(random_state=42, class_weight='balanced')  # Balanced for imbalance
train_evaluate(lr, "Logistic Regression")


=== Logistic Regression Results ===
Accuracy: 0.714
              precision    recall  f1-score   support

           0       0.90      0.72      0.80      1593
           1       0.39      0.70      0.50       407

    accuracy                           0.71      2000
   macro avg       0.65      0.71      0.65      2000
weighted avg       0.80      0.71      0.74      2000

Confusion Matrix:
 [[1143  450]
 [ 122  285]]


#### Highest recall (70%) → catches most churners
#### But 450 false positives and only 39% precision → predicts churn too often
#### Will annoy many loyal customers → bad customer experience
#### Only use this if missing a churner is extremely costly and you’re okay with over-alerting.

In [11]:
#random forest
rf = RandomForestClassifier(random_state=42, n_estimators=100, class_weight='balanced')
train_evaluate(rf, "Random Forest")


=== Random Forest Results ===
Accuracy: 0.861
              precision    recall  f1-score   support

           0       0.87      0.97      0.92      1593
           1       0.78      0.44      0.56       407

    accuracy                           0.86      2000
   macro avg       0.83      0.70      0.74      2000
weighted avg       0.85      0.86      0.85      2000

Confusion Matrix:
 [[1543   50]
 [ 228  179]]


#### Highest accuracy (86.1%) and best precision (78%)
#### Only 50 false positives → only 50 loyal customers wrongly flagged as "will churn"
#### This is critical in real life: you don’t want to spam loyal customers with retention offers
#### Catches 179 out of 407 actual churners (44%) — acceptable trade-off for low noise

#### Best choice if the goal is: “Target high-risk customers with confidence and avoid annoying good customers.”

In [15]:
#XGBoost
xgb = XGBClassifier(random_state=42, scale_pos_weight=(y_train.value_counts()[0] / y_train.value_counts()[1]))  # Handle imbalance
train_evaluate(xgb, "XGBoost")


=== XGBoost Results ===
Accuracy: 0.8225
              precision    recall  f1-score   support

           0       0.90      0.87      0.89      1593
           1       0.56      0.62      0.59       407

    accuracy                           0.82      2000
   macro avg       0.73      0.75      0.74      2000
weighted avg       0.83      0.82      0.83      2000

Confusion Matrix:
 [[1393  200]
 [ 155  252]]


#### Highest F1-score on churn class (0.59) → best balance between precision and recall
#### Catches 252 out of 407 churners (62%) — highest recall among the three
#### Still reasonable precision (only 200 false positives)
#### Best choice if the goal is: “Catch as many churners as possible, even if it means some false alarms.”