# 📊 EngageTrack AI – Churn Prediction Notebook

In [None]:

# Churn Prediction with XGBoost and SHAP – EngageTrack AI

## Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix

import xgboost as xgb
import shap

# Setup
pd.set_option('display.max_columns', None)
sns.set(style="whitegrid")

## Step 2: Load Dataset
df = pd.read_csv('../data/customer_churn_dataset-testing-master.csv')

## Step 3: Preprocessing
# Drop ID
df = df.drop(columns=['CustomerID'])

# Encode categoricals
label_encoders = {}
categorical_cols = ['Gender', 'Subscription Type', 'Contract Length']
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split X and y
X = df.drop(columns=['Churn'])
y = df['Churn']

# Scale numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

## Step 4: Train XGBoost Model
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

## Step 5: Evaluation
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("AUC Score:", roc_auc_score(y_test, y_proba))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Confusion matrix
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')
plt.title("Confusion Matrix")
plt.show()

## Step 6: SHAP Explainability
explainer = shap.Explainer(model)
shap_values = explainer(X_test)

# Summary plot
shap.summary_plot(shap_values, X, feature_names=df.drop(columns='Churn').columns)
