In [1]:
!pip install catboost mlflow

Collecting catboost
  Downloading catboost-1.2.8-cp313-cp313-win_amd64.whl.metadata (1.5 kB)
Collecting mlflow
  Downloading mlflow-3.7.0-py3-none-any.whl.metadata (31 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.21-py3-none-any.whl.metadata (12 kB)
Collecting mlflow-skinny==3.7.0 (from mlflow)
  Downloading mlflow_skinny-3.7.0-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.7.0 (from mlflow)
  Downloading mlflow_tracing-3.7.0-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow)
  Downloading flask_cors-6.0.2-py3-none-any.whl.metadata (5.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting huey<3,>=2.5.0 (from mlflow)
  Downloading huey-2.5.5-py3-none-any.whl.metadata (4.8 kB)
Collecting waitress<4 (from mlflow)
  Downloading waitress-3.0.2-py3-none-any.whl.metada


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import os
import json
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)

from catboost import CatBoostClassifier
import mlflow
import mlflow.catboost
import joblib


# ---------------- CONFIG ----------------
TRAIN_CSV = "customer_churn_dataset-training-master.csv"
TEST_CSV  = "customer_churn_dataset-testing-master.csv"

EXPERIMENT_NAME = "customer-churn-merged-split"
OUT_DIR = "artifacts"

RANDOM_STATE = 42
TEST_SIZE = 0.20
VAL_SIZE  = 0.20

TARGET = "Churn"
ID_COL = "CustomerID"

CAT_COLS = ["Gender", "Subscription Type", "Contract Length"]
NUM_COLS = [
    "Age","Tenure","Usage Frequency",
    "Support Calls","Payment Delay",
    "Total Spend","Last Interaction"
]

DROP_FEATURES = []   # optionally add ["Total Spend"] etc.


# ---------------- HELPERS ----------------
def clean_target(df):
    """Normalize churn labels to {0,1}"""
    df = df.copy()
    df[TARGET] = df[TARGET].replace({
        "Yes": 1, "No": 0,
        "yes": 1, "no": 0,
        True: 1, False: 0
    })
    df[TARGET] = pd.to_numeric(df[TARGET], errors="coerce")
    df = df.dropna(subset=[TARGET])
    df[TARGET] = df[TARGET].astype(int)
    return df[df[TARGET].isin([0, 1])]

def clean_features(df):
    """Basic dtype cleanup"""
    df = df.copy()
    for c in CAT_COLS:
        if c in df.columns:
            df[c] = df[c].astype("string")
    for c in NUM_COLS:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")
    return df

def split_xy(df):
    """Separate features and target"""
    y = df[TARGET].values
    X = df.drop(columns=[TARGET, ID_COL], errors="ignore")
    if DROP_FEATURES:
        X = X.drop(columns=[c for c in DROP_FEATURES if c in X.columns])
    return X, y

def evaluate(y_true, proba, threshold=0.5):
    """Standard binary classification metrics"""
    pred = (proba >= threshold).astype(int)
    return {
        "accuracy": accuracy_score(y_true, pred),
        "precision": precision_score(y_true, pred, zero_division=0),
        "recall": recall_score(y_true, pred, zero_division=0),
        "f1": f1_score(y_true, pred, zero_division=0),
        "roc_auc": roc_auc_score(y_true, proba),
        "pred_pos_rate": pred.mean(),
        "confusion_matrix": confusion_matrix(y_true, pred).tolist()
    }


# ---------------- MAIN ----------------
def main():
    os.makedirs(OUT_DIR, exist_ok=True)

    # ---- Load & merge datasets ----
    df_train = pd.read_csv(TRAIN_CSV)
    df_test  = pd.read_csv(TEST_CSV)
    df = pd.concat([df_train, df_test], ignore_index=True)

    df = clean_target(df)
    df = clean_features(df)

    # ---- Train / Val / Test split ----
    X, y = split_xy(df)

    X_tmp, X_test, y_tmp, y_test = train_test_split(
        X, y,
        test_size=TEST_SIZE,
        random_state=RANDOM_STATE,
        stratify=y
    )

    X_train, X_val, y_train, y_val = train_test_split(
        X_tmp, y_tmp,
        test_size=VAL_SIZE,
        random_state=RANDOM_STATE,
        stratify=y_tmp
    )

    cat_cols = [c for c in CAT_COLS if c in X_train.columns]

    # ---- Model ----
    model = CatBoostClassifier(
        loss_function="Logloss",
        eval_metric="AUC",
        iterations=2000,
        learning_rate=0.05,
        depth=6,
        l2_leaf_reg=5,
        random_seed=RANDOM_STATE,
        early_stopping_rounds=100,
        verbose=200
    )

    # ---- MLflow ----
    mlflow.set_experiment(EXPERIMENT_NAME)
    with mlflow.start_run():
        mlflow.log_params({
            "model": "CatBoostClassifier",
            "iterations": 2000,
            "learning_rate": 0.05,
            "depth": 6,
            "drop_features": ",".join(DROP_FEATURES) if DROP_FEATURES else "NONE"
        })

        # Train with validation
        model.fit(
            X_train, y_train,
            eval_set=(X_val, y_val),
            cat_features=cat_cols,
            use_best_model=True
        )

        # Test evaluation
        test_proba = model.predict_proba(X_test)[:, 1]
        metrics = evaluate(y_test, test_proba)

        print("\nTEST METRICS @0.5")
        for k, v in metrics.items():
            if k != "confusion_matrix":
                print(f"{k}: {v}")
        print("Confusion matrix:")
        print(np.array(metrics["confusion_matrix"]))

        # Log metrics
        for k, v in metrics.items():
            if k != "confusion_matrix":
                mlflow.log_metric(k, v)

        # Save artifacts
        report_path = os.path.join(OUT_DIR, "test_report.json")
        with open(report_path, "w") as f:
            json.dump(metrics, f, indent=2)
        mlflow.log_artifact(report_path)

        model_path = os.path.join(OUT_DIR, "catboost_model.cbm")
        model.save_model(model_path)
        mlflow.log_artifact(model_path)

        meta_path = os.path.join(OUT_DIR, "model_meta.joblib")
        joblib.dump({
            "cat_cols": cat_cols,
            "drop_features": DROP_FEATURES,
            "target": TARGET
        }, meta_path)
        mlflow.log_artifact(meta_path)

        mlflow.catboost.log_model(model, name="model")

    print("\nDONE. Clean IID split + sane metrics + MLflow tracking.")

if __name__ == "__main__":
    main()


0:	test: 0.9316922	best: 0.9316922 (0)	total: 183ms	remaining: 6m 5s
200:	test: 0.9537471	best: 0.9537717 (179)	total: 26.8s	remaining: 3m 59s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 0.9539116604
bestIteration = 267

Shrink model to first 268 iterations.

TEST METRICS @0.5
accuracy: 0.9312167217592684
precision: 0.8986713388815886
recall: 0.9874507566979803
f1: 0.9409716324103958
roc_auc: 0.953458482064828
pred_pos_rate: 0.6100532451851706
Confusion matrix:
[[38697  6246]
 [  704 55395]]

DONE. Clean IID split + sane metrics + MLflow tracking.
