In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

In [None]:
X = df.drop(columns=['churn'], errors='ignore')  # Features
y = df['churn']  # Target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models and their hyperparameters for tuning
models = {
    'RandomForest': {
        'model': RandomForestClassifier(random_state=42),
        'params': {
            'n_estimators': [50, 100, 200],
            'max_features': ['auto', 'sqrt'],
            'max_depth': [None, 10, 20],
            'min_samples_split': [2, 5],
            'min_samples_leaf': [1, 2]
        }
    },
    'LogisticRegression': {
        'model': LogisticRegression(max_iter=1000),
        'params': {
            'C': [0.1, 1.0, 10.0],
            'solver': ['liblinear', 'saga']
        }
    },
    'XGBoost': {
        'model': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
        'params': {
            'n_estimators': [50, 100],
            'learning_rate': [0.01, 0.1],
            'max_depth': [3, 5]
        }
    },
    'GradientBoosting': {
        'model': GradientBoostingClassifier(),
        'params': {
            'n_estimators': [50, 100],
            'learning_rate': [0.01, 0.1],
            'max_depth': [3, 5]
        }
    },
    'SVC': {
        'model': SVC(probability=True),  # Enable probability estimates for AUC-ROC calculation
        'params': {
            'C': [0.1, 1.0, 10.0],
            'kernel': ['linear', 'rbf'],
            'gamma': ['scale', 'auto']
        }
    },
    'KNeighbors': {
        'model': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [3, 5, 10],
            'weights': ['uniform', 'distance']
        }
    }
}

# Function to perform GridSearchCV for hyperparameter tuning
def tune_model(model_name, model_instance, param_grid):
    grid_search = GridSearchCV(estimator=model_instance,
                               param_grid=param_grid,
                               scoring='accuracy',
                               cv=5,
                               verbose=1,
                               n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search

# Dictionary to store best models and their scores
best_models = {}

for model_name, model_info in models.items():
    print(f"Tuning {model_name}...")
    best_model = tune_model(model_name, model_info['model'], model_info['params'])
    best_models[model_name] = best_model

# Evaluate the best models on the test set
for model_name, best_model in best_models.items():
    y_pred = best_model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    
   # Assuming binary classification for AUC-ROC calculation 
   probas = best_model.predict_proba(X_test)[:, 1]
   auc_roc = roc_auc_score(y_test, probas)

   print(f"{model_name} Results:")
   print(f"Best Parameters: {best_model.best_params_}")
   print(f"Accuracy: {accuracy:.2f}")
   print(f"Precision: {precision:.2f}")
   print(f"Recall: {recall:.2f}")
   print(f"AUC-ROC: {auc_roc:.2f}\n")