In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score, classification_report
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV


# ------------------------------
# 1. Load dataset
# ------------------------------
df = pd.read_csv("Churn.csv")

# ------------------------------
# 2. Encode categorical columns
# ------------------------------
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

# ------------------------------
# 3. Define features & target
# ------------------------------
target = "Closed"
df = df.dropna(subset=[target])
X = df.drop(columns=[target])
y = df[target]

# ------------------------------
# 4. Train-Test Split
# ------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ------------------------------
# 5. Train XGBoost Model
# ------------------------------
scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum()


params = {
    'max_depth': [3, 5, 7, 9,11],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'n_estimators': [100, 300, 500,700,1000],
    'subsample': [0.7, 0.8, 1.0],
    'colsample_bytree': [0.7, 0.8, 1.0],
    'scale_pos_weight': [1, 2, 5, 10]
}

model = RandomizedSearchCV(
    XGBClassifier(objective='binary:logistic', eval_metric='auc', use_label_encoder=False),
    param_distributions=params,
    scoring='roc_auc',
    n_iter=20,
    cv=3,
    verbose=1,
    random_state=42
)

model.fit(X_train, y_train)

# ------------------------------
# 6. Predictions & Probabilities
# ------------------------------
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

# ------------------------------
# 7. Evaluation
# ------------------------------
print("AUC Score:", roc_auc_score(y_test, y_prob))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Fitting 3 folds for each of 20 candidates, totalling 60 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


AUC Score: 0.8435555278418656

Classification Report:
               precision    recall  f1-score   support

         0.0       0.95      0.59      0.73      1580
         1.0       0.36      0.88      0.51       406

    accuracy                           0.65      1986
   macro avg       0.65      0.73      0.62      1986
weighted avg       0.83      0.65      0.68      1986



In [4]:
import joblib

# Save model and scaler/encoder
joblib.dump(model, "churn_model.pkl")      # if you used scaling
joblib.dump(label_encoders, "encoder.pkl")     # if you used label encoding/one-hot

['encoder.pkl']