plot multiple ROC curves together

In [13]:
import os
import pandas as pd
import matplotlib.pyplot as plt

def plot_multiple_roc_curves(csv_files, model_names, save_path):
    """
    Plots multiple ROC curves from saved FPR/TPR values.
    
    :param csv_files: List of CSV file paths containing 'fpr' and 'tpr' columns.
    :param model_names: List of model names to display in the legend.
    :param save_path: Path to save the combined ROC plot.
    """
    if len(csv_files) != len(model_names):
        raise ValueError("Number of CSV files and model names must match.")

    plt.figure(figsize=(8, 6))

    # Loop through each saved ROC CSV
    for csv_file, model_name in zip(csv_files, model_names):
        # Load CSV
        df = pd.read_csv(csv_file)

        # Extract FPR and TPR
        fpr = df['fpr'].values
        tpr = df['tpr'].values
        auc_value = df['auc'].values[0]  # AUC is the same across all rows in the file

        # Plot ROC curve
        plt.plot(fpr, tpr, label=f'{model_name} (AUC = {auc_value:.2f})', linewidth=2)

    # Add diagonal line for random guessing
    plt.plot([0, 1], [0, 1], 'r--', label="Random (AUC = 0.50)")

    # Format plot
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Comparison of ROC Curves for FineTuned Bert with Cross Entropy, n=10,000')
    plt.legend(loc="lower right")

    # Save the final plot
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    plt.savefig(save_path)
    plt.close()


In [14]:
# List of saved CSV files (adjust paths as needed)
csv_files = [
    "finetunedBert1/_Boosting_with_our_bert_features.csv",
    "finetunedBert1/_Decision_Tree_with_our_bert_features.csv",
    "finetunedBert1/Logistic_Regression_reg1_roc_logistic_finetune1run2.csv",
    "finetunedBert1/_Random_Forest_with_our_bert_features.csv",
    "finetunedBert1/NeuralNet_reg0.0025_roc.csv"
    
]

# Manually input model names for the legend
model_names = [
    "XGBoost",
    "Decision Tree",
    "Logistic Regression",
    "Random Forest",
    "2-layer Neural Network"
]

# Save path for the combined ROC plot
save_path = "results/combined_roc_curve_Random_Forest.png"

# Call the function
plot_multiple_roc_curves(csv_files, model_names, save_path)
