# Comprehensive Modeling with Sampling Techniques - Customer Churn Prediction

This notebook implements and compares three classification algorithms with various sampling techniques:

**Models:**
1. Logistic Regression
2. Random Forest
3. XGBoost

**Sampling Techniques:**
- No Sampling (Baseline)
- Random Oversampling
- SMOTE (Synthetic Minority Oversampling Technique)
- ADASYN (Adaptive Synthetic Sampling)
- Random Undersampling
- Tomek Links
- NearMiss
- SMOTE + Tomek Links (Combined)
- SMOTE + ENN (Combined)

**Optimization:**
- Hyperparameter tuning using GridSearchCV and RandomizedSearchCV
- Cross-validation
- Comprehensive performance evaluation

In [1]:
from pathlib import Path
import os

SRC = str(Path.cwd().parent)
os.chdir(SRC)
print(SRC)

E:\DS lectures\customer-churn-ds\customer_churn_prediction


In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from time import time
import json

# Sklearn imports
from sklearn.model_selection import (
    train_test_split, cross_val_score, GridSearchCV, 
    RandomizedSearchCV, StratifiedKFold
)
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, classification_report,
    roc_curve, precision_recall_curve, average_precision_score
)

# XGBoost
import xgboost as xgb
from xgboost import XGBClassifier

# Imbalanced-learn for sampling
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler, TomekLinks, NearMiss
from imblearn.combine import SMOTETomek, SMOTEENN

warnings.filterwarnings('ignore')

# Set visualization styles
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', lambda x: '%.4f' % x)

# Set random seed for reproducibility
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

## 1. Data Loading and Preprocessing

In [3]:
# Load the dataset
df = pd.read_csv('data/raw/customer_churn_dataset_with_date.csv')

print(f"Dataset shape: {df.shape}")
print(f"\nFirst few rows:")
df.head()

Dataset shape: (100000, 11)

First few rows:


Unnamed: 0,customer_id,age,tenure_months,monthly_charges,total_charges,contract_type,internet_service,support_calls,late_payments,churn,Date
0,1,46.0,8.0,63.67,102.96,Month-to-month,Fiber optic,3.0,0,0,2025-01-01
1,2,38.0,28.0,93.66,2830.15,One year,Fiber optic,0.0,1,1,2025-01-02
2,3,48.0,5.0,122.37,924.36,Month-to-month,DSL,1.0,0,0,2025-01-03
3,4,58.0,37.0,66.45,1990.95,One year,Fiber optic,5.0,2,0,2025-01-04
4,5,37.0,2.0,57.32,20.03,One year,Fiber optic,0.0,1,1,2025-01-05


In [4]:
# Preprocessing
df_processed = df.copy()

# Drop non-predictive columns
df_processed = df_processed.drop(['customer_id', 'Date'], axis=1, errors='ignore')

# Separate features and target
X = df_processed.drop('churn', axis=1)
y = df_processed['churn']

print(f"\nChurn distribution:")
print(y.value_counts())
print(f"\nChurn rate: {y.mean()*100:.2f}%")
print(f"Imbalance ratio: 1:{(y==0).sum()/(y==1).sum():.2f}")


Churn distribution:
churn
0    60748
1    39252
Name: count, dtype: int64

Churn rate: 39.25%
Imbalance ratio: 1:1.55


In [5]:
# Identify feature types
numerical_features = X.select_dtypes(include=[np.number]).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

print(f"Numerical features ({len(numerical_features)}): {numerical_features}")
print(f"Categorical features ({len(categorical_features)}): {categorical_features}")

Numerical features (6): ['age', 'tenure_months', 'monthly_charges', 'total_charges', 'support_calls', 'late_payments']
Categorical features (2): ['contract_type', 'internet_service']


In [6]:
# Handle missing values
for col in numerical_features:
    if X[col].isnull().sum() > 0:
        X[col] = X[col].fillna(X[col].median())

for col in categorical_features:
    if X[col].isnull().sum() > 0:
        X[col] = X[col].fillna(X[col].mode()[0])

print("Missing values handled")
print(f"Remaining missing values: {X.isnull().sum().sum()}")

Missing values handled
Remaining missing values: 0


In [7]:
# Encode categorical features
X_encoded = X.copy()
label_encoders = {}

for col in categorical_features:
    le = LabelEncoder()
    X_encoded[col] = le.fit_transform(X[col])
    label_encoders[col] = le
    print(f"\nEncoded {col}:")
    print(f"  Original values: {X[col].unique()}")
    print(f"  Encoded values: {X_encoded[col].unique()}")

print(f"\nEncoded dataset shape: {X_encoded.shape}")


Encoded contract_type:
  Original values: <StringArray>
['Month-to-month', 'One year', 'Two year']
Length: 3, dtype: str
  Encoded values: [0 1 2]

Encoded internet_service:
  Original values: <StringArray>
['Fiber optic', 'DSL', 'No']
Length: 3, dtype: str
  Encoded values: [1 0 2]

Encoded dataset shape: (100000, 8)


In [8]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=RANDOM_STATE, stratify=y
)

print(f"Training set size: {X_train.shape}")
print(f"Test set size: {X_test.shape}")
print(f"\nTraining set churn rate: {y_train.mean()*100:.2f}%")
print(f"Test set churn rate: {y_test.mean()*100:.2f}%")

Training set size: (80000, 8)
Test set size: (20000, 8)

Training set churn rate: 39.25%
Test set churn rate: 39.25%


In [9]:
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert back to DataFrame
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

print("Features scaled successfully!")

Features scaled successfully!


## 2. Define Sampling Techniques

In [10]:
# Define all sampling techniques
sampling_techniques = {
    'No Sampling': None,
    'Random Oversampling': RandomOverSampler(random_state=RANDOM_STATE),
    'SMOTE': SMOTE(random_state=RANDOM_STATE),
    'ADASYN': ADASYN(random_state=RANDOM_STATE),
    'Random Undersampling': RandomUnderSampler(random_state=RANDOM_STATE),
    'Tomek Links': TomekLinks(),
    'NearMiss': NearMiss(version=1),
    'SMOTE + Tomek': SMOTETomek(random_state=RANDOM_STATE),
    'SMOTE + ENN': SMOTEENN(random_state=RANDOM_STATE)
}

print("Sampling techniques defined:")
for name in sampling_techniques.keys():
    print(f"  - {name}")

Sampling techniques defined:
  - No Sampling
  - Random Oversampling
  - SMOTE
  - ADASYN
  - Random Undersampling
  - Tomek Links
  - NearMiss
  - SMOTE + Tomek
  - SMOTE + ENN


In [11]:
# Function to apply sampling
def apply_sampling(X_train, y_train, sampler, technique_name):
    """
    Apply sampling technique and return resampled data
    """
    if sampler is None:
        return X_train, y_train
    
    print(f"\nApplying {technique_name}...")
    print(f"  Before: {len(y_train)} samples, Churn rate: {y_train.mean()*100:.2f}%")
    
    X_resampled, y_resampled = sampler.fit_resample(X_train, y_train)
    
    print(f"  After: {len(y_resampled)} samples, Churn rate: {y_resampled.mean()*100:.2f}%")
    print(f"  Class distribution: {pd.Series(y_resampled).value_counts().to_dict()}")
    
    return X_resampled, y_resampled

## 3. Define Evaluation Metrics

In [12]:
# Function to evaluate model
def evaluate_model(model, X_train, y_train, X_test, y_test, model_name, sampling_name):
    """
    Comprehensive model evaluation
    """
    # Training predictions
    y_train_pred = model.predict(X_train)
    y_train_proba = model.predict_proba(X_train)[:, 1]
    
    # Test predictions
    y_test_pred = model.predict(X_test)
    y_test_proba = model.predict_proba(X_test)[:, 1]
    
    # Calculate metrics
    results = {
        'Model': model_name,
        'Sampling': sampling_name,
        
        # Training metrics
        'Train_Accuracy': accuracy_score(y_train, y_train_pred),
        'Train_Precision': precision_score(y_train, y_train_pred, zero_division=0),
        'Train_Recall': recall_score(y_train, y_train_pred, zero_division=0),
        'Train_F1': f1_score(y_train, y_train_pred, zero_division=0),
        'Train_ROC_AUC': roc_auc_score(y_train, y_train_proba),
        
        # Test metrics
        'Test_Accuracy': accuracy_score(y_test, y_test_pred),
        'Test_Precision': precision_score(y_test, y_test_pred, zero_division=0),
        'Test_Recall': recall_score(y_test, y_test_pred, zero_division=0),
        'Test_F1': f1_score(y_test, y_test_pred, zero_division=0),
        'Test_ROC_AUC': roc_auc_score(y_test, y_test_proba),
        'Test_AP': average_precision_score(y_test, y_test_proba)
    }
    
    return results, y_test_pred, y_test_proba

print("Evaluation function defined")

Evaluation function defined


## 4. Logistic Regression Models

### 4.1 Baseline Logistic Regression (All Sampling Techniques)

In [13]:
# Train Logistic Regression with all sampling techniques
print("="*80)
print("LOGISTIC REGRESSION - BASELINE MODELS")
print("="*80)

lr_results = []

for sampling_name, sampler in sampling_techniques.items():
    print(f"\n{'='*80}")
    print(f"Sampling: {sampling_name}")
    print(f"{'='*80}")
    
    # Apply sampling
    X_train_resampled, y_train_resampled = apply_sampling(
        X_train_scaled, y_train, sampler, sampling_name
    )
    
    # Train model
    lr_model = LogisticRegression(max_iter=1000, random_state=RANDOM_STATE)
    lr_model.fit(X_train_resampled, y_train_resampled)
    
    # Evaluate
    results, _, _ = evaluate_model(
        lr_model, X_train_resampled, y_train_resampled, 
        X_test_scaled, y_test, 'Logistic Regression', sampling_name
    )
    lr_results.append(results)
    
    print(f"\nResults:")
    print(f"  Test Accuracy: {results['Test_Accuracy']:.4f}")
    print(f"  Test Precision: {results['Test_Precision']:.4f}")
    print(f"  Test Recall: {results['Test_Recall']:.4f}")
    print(f"  Test F1-Score: {results['Test_F1']:.4f}")
    print(f"  Test ROC-AUC: {results['Test_ROC_AUC']:.4f}")

# Create results DataFrame
lr_results_df = pd.DataFrame(lr_results)
print("\n" + "="*80)
print("LOGISTIC REGRESSION SUMMARY")
print("="*80)
print(lr_results_df[['Sampling', 'Test_Accuracy', 'Test_Precision', 
                       'Test_Recall', 'Test_F1', 'Test_ROC_AUC']])

LOGISTIC REGRESSION - BASELINE MODELS

Sampling: No Sampling

Results:
  Test Accuracy: 0.6371
  Test Precision: 0.5632
  Test Recall: 0.3361
  Test F1-Score: 0.4209
  Test ROC-AUC: 0.6613

Sampling: Random Oversampling

Applying Random Oversampling...
  Before: 80000 samples, Churn rate: 39.25%
  After: 97196 samples, Churn rate: 50.00%
  Class distribution: {0: 48598, 1: 48598}

Results:
  Test Accuracy: 0.6048
  Test Precision: 0.4976
  Test Recall: 0.7048
  Test F1-Score: 0.5833
  Test ROC-AUC: 0.6612

Sampling: SMOTE

Applying SMOTE...
  Before: 80000 samples, Churn rate: 39.25%
  After: 97196 samples, Churn rate: 50.00%
  Class distribution: {0: 48598, 1: 48598}

Results:
  Test Accuracy: 0.6040
  Test Precision: 0.4968
  Test Recall: 0.7025
  Test F1-Score: 0.5821
  Test ROC-AUC: 0.6608

Sampling: ADASYN

Applying ADASYN...
  Before: 80000 samples, Churn rate: 39.25%
  After: 97250 samples, Churn rate: 50.03%
  Class distribution: {1: 48652, 0: 48598}

Results:
  Test Accuracy: 

### 4.2 Hyperparameter Tuning - Logistic Regression

In [14]:
# Find best sampling technique for Logistic Regression
best_lr_sampling = lr_results_df.loc[lr_results_df['Test_F1'].idxmax(), 'Sampling']
print(f"Best sampling technique for Logistic Regression: {best_lr_sampling}")
print(f"Best F1-Score: {lr_results_df['Test_F1'].max():.4f}")

# Apply best sampling
best_sampler_lr = sampling_techniques[best_lr_sampling]
X_train_lr_best, y_train_lr_best = apply_sampling(
    X_train_scaled, y_train, best_sampler_lr, best_lr_sampling
)

# Hyperparameter tuning
print("\nHyperparameter Tuning for Logistic Regression...")
print("="*80)

param_grid_lr = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2', 'elasticnet'],
    'solver': ['liblinear', 'saga'],
    'class_weight': [None, 'balanced'],
    'max_iter': [1000, 2000]
}

# Grid Search with cross-validation
lr_grid = GridSearchCV(
    LogisticRegression(random_state=RANDOM_STATE),
    param_grid_lr,
    cv=5,
    scoring='f1',
    n_jobs=-1,
    verbose=1
)

lr_grid.fit(X_train_lr_best, y_train_lr_best)

print(f"\nBest parameters: {lr_grid.best_params_}")
print(f"Best CV F1-Score: {lr_grid.best_score_:.4f}")

# Evaluate tuned model
best_lr_model = lr_grid.best_estimator_
lr_tuned_results, lr_tuned_pred, lr_tuned_proba = evaluate_model(
    best_lr_model, X_train_lr_best, y_train_lr_best,
    X_test_scaled, y_test, 'Logistic Regression (Tuned)', best_lr_sampling
)

print("\nTuned Model Results:")
print(f"  Test Accuracy: {lr_tuned_results['Test_Accuracy']:.4f}")
print(f"  Test Precision: {lr_tuned_results['Test_Precision']:.4f}")
print(f"  Test Recall: {lr_tuned_results['Test_Recall']:.4f}")
print(f"  Test F1-Score: {lr_tuned_results['Test_F1']:.4f}")
print(f"  Test ROC-AUC: {lr_tuned_results['Test_ROC_AUC']:.4f}")

Best sampling technique for Logistic Regression: SMOTE + ENN
Best F1-Score: 0.5904

Applying SMOTE + ENN...
  Before: 80000 samples, Churn rate: 39.25%
  After: 34267 samples, Churn rate: 55.28%
  Class distribution: {1: 18944, 0: 15323}

Hyperparameter Tuning for Logistic Regression...
Fitting 5 folds for each of 144 candidates, totalling 720 fits

Best parameters: {'C': 0.001, 'class_weight': 'balanced', 'max_iter': 1000, 'penalty': 'l1', 'solver': 'liblinear'}
Best CV F1-Score: 0.8214

Tuned Model Results:
  Test Accuracy: 0.5933
  Test Precision: 0.4882
  Test Recall: 0.7478
  Test F1-Score: 0.5907
  Test ROC-AUC: 0.6621


## 5. Random Forest Models

### 5.1 Baseline Random Forest (All Sampling Techniques)

In [15]:
# Train Random Forest with all sampling techniques
print("="*80)
print("RANDOM FOREST - BASELINE MODELS")
print("="*80)

rf_results = []

for sampling_name, sampler in sampling_techniques.items():
    print(f"\n{'='*80}")
    print(f"Sampling: {sampling_name}")
    print(f"{'='*80}")
    
    # Apply sampling
    X_train_resampled, y_train_resampled = apply_sampling(
        X_train, y_train, sampler, sampling_name
    )
    
    # Train model
    rf_model = RandomForestClassifier(
        n_estimators=100, 
        random_state=RANDOM_STATE,
        n_jobs=-1
    )
    rf_model.fit(X_train_resampled, y_train_resampled)
    
    # Evaluate
    results, _, _ = evaluate_model(
        rf_model, X_train_resampled, y_train_resampled,
        X_test, y_test, 'Random Forest', sampling_name
    )
    rf_results.append(results)
    
    print(f"\nResults:")
    print(f"  Test Accuracy: {results['Test_Accuracy']:.4f}")
    print(f"  Test Precision: {results['Test_Precision']:.4f}")
    print(f"  Test Recall: {results['Test_Recall']:.4f}")
    print(f"  Test F1-Score: {results['Test_F1']:.4f}")
    print(f"  Test ROC-AUC: {results['Test_ROC_AUC']:.4f}")

# Create results DataFrame
rf_results_df = pd.DataFrame(rf_results)
print("\n" + "="*80)
print("RANDOM FOREST SUMMARY")
print("="*80)
print(rf_results_df[['Sampling', 'Test_Accuracy', 'Test_Precision',
                       'Test_Recall', 'Test_F1', 'Test_ROC_AUC']])

RANDOM FOREST - BASELINE MODELS

Sampling: No Sampling

Results:
  Test Accuracy: 0.6341
  Test Precision: 0.5436
  Test Recall: 0.4229
  Test F1-Score: 0.4757
  Test ROC-AUC: 0.6585

Sampling: Random Oversampling

Applying Random Oversampling...
  Before: 80000 samples, Churn rate: 39.25%
  After: 97196 samples, Churn rate: 50.00%
  Class distribution: {0: 48598, 1: 48598}


KeyboardInterrupt: 

### 5.2 Hyperparameter Tuning - Random Forest

In [None]:
# Find best sampling technique for Random Forest
best_rf_sampling = rf_results_df.loc[rf_results_df['Test_F1'].idxmax(), 'Sampling']
print(f"Best sampling technique for Random Forest: {best_rf_sampling}")
print(f"Best F1-Score: {rf_results_df['Test_F1'].max():.4f}")

# Apply best sampling
best_sampler_rf = sampling_techniques[best_rf_sampling]
X_train_rf_best, y_train_rf_best = apply_sampling(
    X_train, y_train, best_sampler_rf, best_rf_sampling
)

# Hyperparameter tuning with RandomizedSearchCV for efficiency
print("\nHyperparameter Tuning for Random Forest...")
print("="*80)

param_dist_rf = {
    'n_estimators': [100, 200, 300, 500],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2', None],
    'bootstrap': [True, False],
    'class_weight': [None, 'balanced', 'balanced_subsample']
}

# Randomized Search
rf_random = RandomizedSearchCV(
    RandomForestClassifier(random_state=RANDOM_STATE, n_jobs=-1),
    param_dist_rf,
    n_iter=50,
    cv=5,
    scoring='f1',
    n_jobs=-1,
    verbose=1,
    random_state=RANDOM_STATE
)

rf_random.fit(X_train_rf_best, y_train_rf_best)

print(f"\nBest parameters: {rf_random.best_params_}")
print(f"Best CV F1-Score: {rf_random.best_score_:.4f}")

# Evaluate tuned model
best_rf_model = rf_random.best_estimator_
rf_tuned_results, rf_tuned_pred, rf_tuned_proba = evaluate_model(
    best_rf_model, X_train_rf_best, y_train_rf_best,
    X_test, y_test, 'Random Forest (Tuned)', best_rf_sampling
)

print("\nTuned Model Results:")
print(f"  Test Accuracy: {rf_tuned_results['Test_Accuracy']:.4f}")
print(f"  Test Precision: {rf_tuned_results['Test_Precision']:.4f}")
print(f"  Test Recall: {rf_tuned_results['Test_Recall']:.4f}")
print(f"  Test F1-Score: {rf_tuned_results['Test_F1']:.4f}")
print(f"  Test ROC-AUC: {rf_tuned_results['Test_ROC_AUC']:.4f}")

## 6. XGBoost Models

### 6.1 Baseline XGBoost (All Sampling Techniques)

In [None]:
# Train XGBoost with all sampling techniques
print("="*80)
print("XGBOOST - BASELINE MODELS")
print("="*80)

xgb_results = []

for sampling_name, sampler in sampling_techniques.items():
    print(f"\n{'='*80}")
    print(f"Sampling: {sampling_name}")
    print(f"{'='*80}")
    
    # Apply sampling
    X_train_resampled, y_train_resampled = apply_sampling(
        X_train, y_train, sampler, sampling_name
    )
    
    # Train model
    xgb_model = XGBClassifier(
        n_estimators=100,
        random_state=RANDOM_STATE,
        eval_metric='logloss',
        use_label_encoder=False
    )
    xgb_model.fit(X_train_resampled, y_train_resampled)
    
    # Evaluate
    results, _, _ = evaluate_model(
        xgb_model, X_train_resampled, y_train_resampled,
        X_test, y_test, 'XGBoost', sampling_name
    )
    xgb_results.append(results)
    
    print(f"\nResults:")
    print(f"  Test Accuracy: {results['Test_Accuracy']:.4f}")
    print(f"  Test Precision: {results['Test_Precision']:.4f}")
    print(f"  Test Recall: {results['Test_Recall']:.4f}")
    print(f"  Test F1-Score: {results['Test_F1']:.4f}")
    print(f"  Test ROC-AUC: {results['Test_ROC_AUC']:.4f}")

# Create results DataFrame
xgb_results_df = pd.DataFrame(xgb_results)
print("\n" + "="*80)
print("XGBOOST SUMMARY")
print("="*80)
print(xgb_results_df[['Sampling', 'Test_Accuracy', 'Test_Precision',
                        'Test_Recall', 'Test_F1', 'Test_ROC_AUC']])

### 6.2 Hyperparameter Tuning - XGBoost

In [None]:
# Find best sampling technique for XGBoost
best_xgb_sampling = xgb_results_df.loc[xgb_results_df['Test_F1'].idxmax(), 'Sampling']
print(f"Best sampling technique for XGBoost: {best_xgb_sampling}")
print(f"Best F1-Score: {xgb_results_df['Test_F1'].max():.4f}")

# Apply best sampling
best_sampler_xgb = sampling_techniques[best_xgb_sampling]
X_train_xgb_best, y_train_xgb_best = apply_sampling(
    X_train, y_train, best_sampler_xgb, best_xgb_sampling
)

# Hyperparameter tuning with RandomizedSearchCV
print("\nHyperparameter Tuning for XGBoost...")
print("="*80)

param_dist_xgb = {
    'n_estimators': [100, 200, 300, 500],
    'max_depth': [3, 5, 7, 9],
    'learning_rate': [0.01, 0.05, 0.1, 0.3],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 0.1, 0.5, 1],
    'min_child_weight': [1, 3, 5],
    'scale_pos_weight': [1, 2, 3],  # For imbalanced data
    'reg_alpha': [0, 0.1, 1],
    'reg_lambda': [0, 0.1, 1]
}

# Randomized Search
xgb_random = RandomizedSearchCV(
    XGBClassifier(
        random_state=RANDOM_STATE,
        eval_metric='logloss',
        use_label_encoder=False
    ),
    param_dist_xgb,
    n_iter=50,
    cv=5,
    scoring='f1',
    n_jobs=-1,
    verbose=1,
    random_state=RANDOM_STATE
)

xgb_random.fit(X_train_xgb_best, y_train_xgb_best)

print(f"\nBest parameters: {xgb_random.best_params_}")
print(f"Best CV F1-Score: {xgb_random.best_score_:.4f}")

# Evaluate tuned model
best_xgb_model = xgb_random.best_estimator_
xgb_tuned_results, xgb_tuned_pred, xgb_tuned_proba = evaluate_model(
    best_xgb_model, X_train_xgb_best, y_train_xgb_best,
    X_test, y_test, 'XGBoost (Tuned)', best_xgb_sampling
)

print("\nTuned Model Results:")
print(f"  Test Accuracy: {xgb_tuned_results['Test_Accuracy']:.4f}")
print(f"  Test Precision: {xgb_tuned_results['Test_Precision']:.4f}")
print(f"  Test Recall: {xgb_tuned_results['Test_Recall']:.4f}")
print(f"  Test F1-Score: {xgb_tuned_results['Test_F1']:.4f}")
print(f"  Test ROC-AUC: {xgb_tuned_results['Test_ROC_AUC']:.4f}")

## 7. Comprehensive Model Comparison

In [None]:
# Combine all results
all_results = pd.concat([
    lr_results_df,
    rf_results_df,
    xgb_results_df
], ignore_index=True)

# Add tuned models
all_results = pd.concat([
    all_results,
    pd.DataFrame([lr_tuned_results, rf_tuned_results, xgb_tuned_results])
], ignore_index=True)

print("="*80)
print("COMPREHENSIVE MODEL COMPARISON")
print("="*80)
print("\nAll Models Performance:")
print(all_results[['Model', 'Sampling', 'Test_Accuracy', 'Test_Precision',
                    'Test_Recall', 'Test_F1', 'Test_ROC_AUC']].sort_values(
                        'Test_F1', ascending=False
                    ))

In [None]:
# Find best overall model
best_model_idx = all_results['Test_F1'].idxmax()
best_model_info = all_results.iloc[best_model_idx]

print("\n" + "="*80)
print("BEST OVERALL MODEL")
print("="*80)
print(f"Model: {best_model_info['Model']}")
print(f"Sampling: {best_model_info['Sampling']}")
print(f"\nTest Performance:")
print(f"  Accuracy: {best_model_info['Test_Accuracy']:.4f}")
print(f"  Precision: {best_model_info['Test_Precision']:.4f}")
print(f"  Recall: {best_model_info['Test_Recall']:.4f}")
print(f"  F1-Score: {best_model_info['Test_F1']:.4f}")
print(f"  ROC-AUC: {best_model_info['Test_ROC_AUC']:.4f}")

## 8. Visualization - Performance Comparison

In [None]:
# Plot comparison of all models
fig, axes = plt.subplots(2, 3, figsize=(20, 12))

metrics = ['Test_Accuracy', 'Test_Precision', 'Test_Recall', 'Test_F1', 'Test_ROC_AUC']
metric_names = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC']

for idx, (metric, name) in enumerate(zip(metrics, metric_names)):
    row = idx // 3
    col = idx % 3
    
    # Group by model and sampling
    pivot_data = all_results.pivot_table(
        values=metric,
        index='Sampling',
        columns='Model',
        aggfunc='first'
    )
    
    pivot_data.plot(kind='bar', ax=axes[row, col], width=0.8)
    axes[row, col].set_title(f'{name} Comparison', fontsize=14, fontweight='bold')
    axes[row, col].set_xlabel('Sampling Technique', fontsize=10)
    axes[row, col].set_ylabel(name, fontsize=10)
    axes[row, col].tick_params(axis='x', rotation=45)
    axes[row, col].legend(loc='best', fontsize=8)
    axes[row, col].grid(axis='y', alpha=0.3)

# Remove empty subplot
fig.delaxes(axes[1, 2])

plt.tight_layout()
plt.show()

In [None]:
# Heatmap of F1-Scores
plt.figure(figsize=(14, 8))
f1_heatmap = all_results.pivot_table(
    values='Test_F1',
    index='Sampling',
    columns='Model',
    aggfunc='first'
)

sns.heatmap(f1_heatmap, annot=True, fmt='.4f', cmap='RdYlGn', 
            center=f1_heatmap.values.mean(), cbar_kws={'label': 'F1-Score'})
plt.title('F1-Score Heatmap: Models vs Sampling Techniques', 
          fontsize=16, fontweight='bold')
plt.xlabel('Model', fontsize=12)
plt.ylabel('Sampling Technique', fontsize=12)
plt.tight_layout()
plt.show()

## 9. Confusion Matrices for Best Models

In [None]:
# Plot confusion matrices for tuned models
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

models_cm = [
    ('Logistic Regression', lr_tuned_pred),
    ('Random Forest', rf_tuned_pred),
    ('XGBoost', xgb_tuned_pred)
]

for idx, (name, predictions) in enumerate(models_cm):
    cm = confusion_matrix(y_test, predictions)
    
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx],
                xticklabels=['No Churn', 'Churn'],
                yticklabels=['No Churn', 'Churn'])
    axes[idx].set_title(f'{name} (Tuned)\nConfusion Matrix', 
                        fontsize=12, fontweight='bold')
    axes[idx].set_xlabel('Predicted', fontsize=10)
    axes[idx].set_ylabel('Actual', fontsize=10)

plt.tight_layout()
plt.show()

## 10. ROC Curves

In [None]:
# ROC Curves for tuned models
plt.figure(figsize=(12, 8))

models_roc = [
    ('Logistic Regression (Tuned)', lr_tuned_proba, lr_tuned_results['Test_ROC_AUC']),
    ('Random Forest (Tuned)', rf_tuned_proba, rf_tuned_results['Test_ROC_AUC']),
    ('XGBoost (Tuned)', xgb_tuned_proba, xgb_tuned_results['Test_ROC_AUC'])
]

for name, proba, auc_score in models_roc:
    fpr, tpr, _ = roc_curve(y_test, proba)
    plt.plot(fpr, tpr, linewidth=2, label=f'{name} (AUC = {auc_score:.4f})')

# Plot diagonal
plt.plot([0, 1], [0, 1], 'k--', linewidth=2, label='Random Classifier')

plt.xlabel('False Positive Rate', fontsize=12)
plt.ylabel('True Positive Rate', fontsize=12)
plt.title('ROC Curves - Tuned Models', fontsize=16, fontweight='bold')
plt.legend(loc='lower right', fontsize=10)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

## 11. Precision-Recall Curves

In [None]:
# Precision-Recall Curves for tuned models
plt.figure(figsize=(12, 8))

for name, proba, _ in models_roc:
    precision, recall, _ = precision_recall_curve(y_test, proba)
    ap_score = average_precision_score(y_test, proba)
    plt.plot(recall, precision, linewidth=2, 
             label=f'{name} (AP = {ap_score:.4f})')

plt.xlabel('Recall', fontsize=12)
plt.ylabel('Precision', fontsize=12)
plt.title('Precision-Recall Curves - Tuned Models', fontsize=16, fontweight='bold')
plt.legend(loc='best', fontsize=10)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

## 12. Feature Importance (Best Model)

In [None]:
# Feature importance from best tree-based model
# Compare RF and XGBoost feature importance

fig, axes = plt.subplots(1, 2, figsize=(18, 6))

# Random Forest feature importance
rf_importance_df = pd.DataFrame({
    'Feature': X_train.columns,
    'Importance': best_rf_model.feature_importances_
}).sort_values('Importance', ascending=False).head(10)

axes[0].barh(rf_importance_df['Feature'], rf_importance_df['Importance'], 
             color='forestgreen', edgecolor='black')
axes[0].set_xlabel('Importance', fontsize=12)
axes[0].set_ylabel('Features', fontsize=12)
axes[0].set_title('Random Forest - Top 10 Features', fontsize=14, fontweight='bold')
axes[0].invert_yaxis()

# XGBoost feature importance
xgb_importance_df = pd.DataFrame({
    'Feature': X_train.columns,
    'Importance': best_xgb_model.feature_importances_
}).sort_values('Importance', ascending=False).head(10)

axes[1].barh(xgb_importance_df['Feature'], xgb_importance_df['Importance'],
             color='dodgerblue', edgecolor='black')
axes[1].set_xlabel('Importance', fontsize=12)
axes[1].set_ylabel('Features', fontsize=12)
axes[1].set_title('XGBoost - Top 10 Features', fontsize=14, fontweight='bold')
axes[1].invert_yaxis()

plt.tight_layout()
plt.show()

## 13. Save Results and Models

In [None]:
# Save all results to CSV
all_results.to_csv('model_comparison_results.csv', index=False)
print("All results saved to 'model_comparison_results.csv'")

# Save best models
import pickle

models_to_save = {
    'logistic_regression': best_lr_model,
    'random_forest': best_rf_model,
    'xgboost': best_xgb_model,
    'scaler': scaler,
    'label_encoders': label_encoders
}

for name, model in models_to_save.items():
    with open(f'{name}_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    print(f"{name} saved")

print("\nAll models saved successfully!")

## 14. Summary and Insights

In [None]:
print("="*80)
print("MODELING SUMMARY AND INSIGHTS")
print("="*80)

print("\n1. DATASET CHARACTERISTICS:")
print(f"   - Total samples: {len(df):,}")
print(f"   - Training samples: {len(X_train):,}")
print(f"   - Test samples: {len(X_test):,}")
print(f"   - Original churn rate: {y.mean()*100:.2f}%")
print(f"   - Class imbalance ratio: 1:{(y==0).sum()/(y==1).sum():.2f}")

print("\n2. MODELS EVALUATED:")
print("   - Logistic Regression")
print("   - Random Forest")
print("   - XGBoost")

print("\n3. SAMPLING TECHNIQUES TESTED:")
for name in sampling_techniques.keys():
    print(f"   - {name}")

print("\n4. BEST SAMPLING TECHNIQUES BY MODEL:")
print(f"   - Logistic Regression: {best_lr_sampling}")
print(f"   - Random Forest: {best_rf_sampling}")
print(f"   - XGBoost: {best_xgb_sampling}")

print("\n5. BEST OVERALL MODEL:")
print(f"   Model: {best_model_info['Model']}")
print(f"   Sampling: {best_model_info['Sampling']}")
print(f"   Test F1-Score: {best_model_info['Test_F1']:.4f}")
print(f"   Test ROC-AUC: {best_model_info['Test_ROC_AUC']:.4f}")

print("\n6. TUNED MODELS PERFORMANCE:")
print(f"   Logistic Regression:")
print(f"     - F1-Score: {lr_tuned_results['Test_F1']:.4f}")
print(f"     - ROC-AUC: {lr_tuned_results['Test_ROC_AUC']:.4f}")
print(f"   Random Forest:")
print(f"     - F1-Score: {rf_tuned_results['Test_F1']:.4f}")
print(f"     - ROC-AUC: {rf_tuned_results['Test_ROC_AUC']:.4f}")
print(f"   XGBoost:")
print(f"     - F1-Score: {xgb_tuned_results['Test_F1']:.4f}")
print(f"     - ROC-AUC: {xgb_tuned_results['Test_ROC_AUC']:.4f}")

print("\n7. KEY FINDINGS:")
print("   - Sampling techniques significantly impact model performance")
print("   - Tree-based models (RF, XGBoost) generally perform better than Logistic Regression")
print("   - Hyperparameter tuning provides measurable improvements")
print("   - SMOTE and combined techniques often yield best results for imbalanced data")

print("\n8. RECOMMENDATIONS:")
print(f"   - Deploy {best_model_info['Model']} with {best_model_info['Sampling']} sampling")
print("   - Monitor model performance on new data")
print("   - Consider ensemble methods for further improvement")
print("   - Regularly retrain with updated data")

print("\n" + "="*80)
print("END OF MODELING")
print("="*80)