In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

file_path = "Churn_Modelling.csv"
df = pd.read_csv(file_path)

df = df.drop(columns=["RowNumber", "CustomerId", "Surname"], errors="ignore")

label_encoders = {}
for col in ["Geography", "Gender"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

X = df.drop(columns=["Exited"])  
y = df["Exited"] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\n{name} Results:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

def predict_churn():
    print("\nEnter customer details for churn prediction:")
    input_data = {}
    for col in X.columns:
        if col in label_encoders: 
            value = input(f"Enter {col} ({list(label_encoders[col].classes_)}): ")
            value = label_encoders[col].transform([value])[0]
        else:
            value = float(input(f"Enter {col}: "))
        input_data[col] = value
    
    input_df = pd.DataFrame([input_data])
    input_scaled = scaler.transform(input_df)

    print("\nChurn Predictions:")
    for name, model in models.items():
        prediction = model.predict(input_scaled)[0]
        result = "Churn" if prediction == 1 else "No Churn"
        print(f"{name}: {result}")

predict_churn()



Logistic Regression Results:
Accuracy: 0.8155
Confusion Matrix:
 [[1559   48]
 [ 321   72]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.97      0.89      1607
           1       0.60      0.18      0.28       393

    accuracy                           0.82      2000
   macro avg       0.71      0.58      0.59      2000
weighted avg       0.78      0.82      0.77      2000


Random Forest Results:
Accuracy: 0.869
Confusion Matrix:
 [[1553   54]
 [ 208  185]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.97      0.92      1607
           1       0.77      0.47      0.59       393

    accuracy                           0.87      2000
   macro avg       0.83      0.72      0.75      2000
weighted avg       0.86      0.87      0.86      2000


Gradient Boosting Results:
Accuracy: 0.866
Confusion Matrix:
 [[1547   60]
 [ 208  185]]
Classification Report:
      