# LIBRARIES

In [None]:
import glob
import seaborn as sns
import joblib
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve

# READ FILES

In [None]:
X_test = pd.read_csv('../dataset/X_test.csv', index_col=0)
y_test = pd.read_csv('../dataset/y_test.csv', index_col=0)

# READ MODELS

In [None]:
# Initialize the dictionary
models = {}

# Set the folder path
folder_path = '../models/'

# List all subdirectories in the folder
subdirectories = [subdir for subdir in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, subdir))]

# Iterate over the subdirectories
for subdir in subdirectories:
    subdirectory_path = os.path.join(folder_path, subdir)
    
    # List all files in the subdirectory
    files = os.listdir(subdirectory_path)
    
    # Iterate over the files
    for file in files:
        file_path = os.path.join(subdirectory_path, file)
        
        # Extract the file name without extension
        model_name = os.path.splitext(file)[0]
        
        # Load the model using joblib.load and add it to the dictionary
        models[model_name] = joblib.load(file_path)

# GENERALIZATION

The confusion matrix is a table that summarizes the performance of a binary classification model by showing the number of true positive (TP), true negative (TN), false positive (FP), and false negative (FN) predictions on the test set.

## Confusion Matrix

In [None]:
def plot_confusion_matrix(model_name):
        
    if model_name in ['TRACK','TRUST']:
        X_test_model = X_test[model_name].values.reshape(-1, 1)
    else:
        X_test_model = X_test.drop(['TRACK','TRUST'],axis=1)
        
    # Select the model
    model = models[model_name]
    
    # Make predictions on the test set
    y_pred = model.predict(X_test_model)

    # Calculate the confusion matrix
    cm = confusion_matrix(y_test, y_pred)

    # Plot the heatmap
    sns.heatmap(cm, annot=True, fmt='g', cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title(f'Heatmap of Confusion Matrix - {model_name}')
    plt.show()

In [None]:
plot_confusion_matrix('SVM')

In [None]:
plot_confusion_matrix('RF')

In [None]:
plot_confusion_matrix('LR')

In [None]:
plot_confusion_matrix('MLP')

In [None]:
plot_confusion_matrix('TRACK')

In [None]:
plot_confusion_matrix('TRUST')

## ROC

In [None]:
def plot_roc_curve(model_name):
    
    if model_name in ['TRACK','TRUST']:
        X_test_model = X_test[model_name].values.reshape(-1, 1)
    else:
        X_test_model = X_test.drop(['TRACK','TRUST'],axis=1)
        
    model = models[model_name] 
    
    probs = model.predict_proba(X_test_model)[:, 1]
    fpr, tpr, thresholds = roc_curve(y_test, probs)
    auc = roc_auc_score(y_test, probs)
    plt.plot(fpr, tpr, label=f'ROC Curve (AUC={auc:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend()
    plt.title('ROC Curve - ' + model_name)
    plt.show()

In [None]:
plot_roc_curve('SVM')

In [None]:
plot_roc_curve('RF')

In [None]:
plot_roc_curve('LR')

In [None]:
plot_roc_curve('MLP')

In [None]:
plot_roc_curve('TRACK')

In [None]:
plot_roc_curve('TRUST')