In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import (
    accuracy_score, roc_auc_score, precision_score,
    recall_score, f1_score, matthews_corrcoef,
    confusion_matrix, classification_report
)
import pickle

# Step 1: Load dataset
df = pd.read_csv("Customer Churn.csv") 
# Step 2: Features and target
X = df.drop("Churn", axis=1)
y = df["Churn"]

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Step 4: Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 5: Train XGBoost Classifier
xgb_model = XGBClassifier(
    n_estimators=200,        # number of boosting rounds
    learning_rate=0.1,       # step size shrinkage
    max_depth=5,             # depth of trees
    subsample=0.8,           # subsample ratio
    colsample_bytree=0.8,    # feature subsample ratio
    random_state=42,
    use_label_encoder=False,
    eval_metric="logloss"    # avoids warning
)
xgb_model.fit(X_train_scaled, y_train)

# Step 6: Predictions
y_pred = xgb_model.predict(X_test_scaled)
y_proba = xgb_model.predict_proba(X_test_scaled)[:, 1]

# Step 7: Metrics
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_proba)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
mcc = matthews_corrcoef(y_test, y_pred)

print("Accuracy:", accuracy)
print("AUC Score:", auc)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Matthews Correlation Coefficient:", mcc)
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 8: Save model and scaler
with open("model/xgboost_churn_model.pkl", "wb") as f:
    pickle.dump(xgb_model, f)

with open("model/scaler_xgb.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("XGBoost model and scaler saved as pickle files.")

Accuracy: 0.9714285714285714
AUC Score: 0.9922197492818962
Precision: 0.900990099009901
Recall: 0.9191919191919192
F1 Score: 0.91
Matthews Correlation Coefficient: 0.8930839807695614

Confusion Matrix:
 [[521  10]
 [  8  91]]

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98       531
           1       0.90      0.92      0.91        99

    accuracy                           0.97       630
   macro avg       0.94      0.95      0.95       630
weighted avg       0.97      0.97      0.97       630

XGBoost model and scaler saved as pickle files.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
