# 📊 EngageTrack AI – Churn Prediction with XGBoost + SHAP

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report, confusion_matrix

import xgboost as xgb
import shap

# Setup
pd.set_option('display.max_columns', None)
sns.set(style="whitegrid")


In [None]:
df = pd.read_csv('../data/churn.csv')  # Path updated as needed
df.head()


In [None]:
# Encode categorical
label_encoders = {}
categorical_cols = ['Gender', 'Contract Length']
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Define X and y
X = df.drop(columns=['Churn'])
y = df['Churn']

# Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
model = xgb.XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    max_depth=4,
    learning_rate=0.1,
    reg_lambda=1.0,
    reg_alpha=0.2,
    subsample=0.8,
    colsample_bytree=0.8,
    n_estimators=100
)
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("AUC Score:", roc_auc_score(y_test, y_proba))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap="Blues")
plt.title("Confusion Matrix")
plt.show()


In [None]:
explainer = shap.Explainer(model)
shap_values = explainer(X_test)

shap.summary_plot(shap_values, features=X_test, feature_names=X.columns)
