In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import pickle
import joblib
import lightgbm as lgb
from catboost import CatBoostClassifier
import os
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [None]:
def load_models(model_dir):
    """Load all models from the model directory"""
    models = {}

    catboost_model = CatBoostClassifier()
    models['CatBoost'] = catboost_model.load_model(os.path.join(model_dir, "catboost_model.cbm"))

    # Load LightGBM model
    models['LightGBM'] = joblib.load(os.path.join(model_dir, "LighGBM_model.pkl"))

    # Load Logistic Regression model
    models['Logistic Regression'] = joblib.load(os.path.join(model_dir, "LR_model.pkl"))

    # Load Random Forest model
    models['Random Forest'] = joblib.load(os.path.join(model_dir, "random_forest_model.pkl"))
    
    return models

def plot_roc_curves(models, X_test, y_test):
    """Plot ROC curves for all models"""
    plt.figure(figsize=(10, 8))
    colors = ['blue', 'red', 'green', 'purple']
    
    for (name, model), color in zip(models.items(), colors):
        try:
            # Get predictions
            if name == 'CatBoost':
                y_pred_proba = model.predict_proba(X_test)[:, 1]
            else:
                y_pred_proba = model.predict_proba(X_test)[:, 1]
            
            # Calculate ROC curve and AUC
            fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
            roc_auc = auc(fpr, tpr)
            
            # Plot ROC curve
            plt.plot(fpr, tpr, color=color, lw=2,
                     label=f'{name} (AUC = {roc_auc:.3f})')
            
        except Exception as e:
            print(f"Error plotting ROC curve for {name}: {e}")
    # Plot diagonal line
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    
    # Customize plot
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curves')
    plt.legend(loc="lower right")
    plt.grid(True)
    
    # Save plot
    plt.savefig('../plots/roc_curves_comparison.png')
    plt.close()



In [5]:
def plot_AUC():
    # Set the path to your model directory
    MODEL_DIR = "../model"
    
    # Load your test data
    # Replace these lines with your actual data loading code
    try:
        X_test = pd.read_csv("../data/test/X_test.csv")
        y_test = pd.read_csv('../data/test/y_test.csv')
    except Exception as e:
        print(f"Error loading test data: {e}")
        return
    
    # Load models
    models = load_models(MODEL_DIR)
    
    if not models:
        print("No models were loaded successfully.")
        return
    
    # Plot ROC curves
    plot_roc_curves(models, X_test, y_test)
    
    # Print AUC scores in a table
    print("\nAUC Scores:")
    print("-" * 40)
    print(f"{'Model':<20} {'AUC Score':<10}")
    print("-" * 40)
    
    for name, model in models.items():
        try:
            if name == 'CatBoost':
                y_pred_proba = model.predict_proba(X_test)[:, 1]
            else:
                y_pred_proba = model.predict_proba(X_test)[:, 1]
            
            roc_auc = auc(roc_curve(y_test, y_pred_proba)[0],
                         roc_curve(y_test, y_pred_proba)[1])
            print(f"{name:<20} {roc_auc:.4f}")
        except Exception as e:
            print(f"{name:<20} Error: {e}")
    
    print("-" * 40)


plot_AUC()


AUC Scores:
----------------------------------------
Model                AUC Score 
----------------------------------------
CatBoost             0.8198
LightGBM             0.8193
Logistic Regression  0.8150
Random Forest        0.8083
----------------------------------------
