In [None]:
import pandas as pd  # Import pandas library for data manipulation
from sklearn.metrics import roc_curve, auc  # Import functions for ROC curve and AUC calculation
import matplotlib.pyplot as plt  # Import matplotlib for plotting

data = pd.read_csv("roc-final-data.csv")  # Read CSV data into a pandas DataFrame

# Define tools and labels
tools = list(data.columns)[1:]  # Extract tool names from column names
labels = data['Clinvar']  # Extract labels from 'Clinvar' column

# Define colors for each tool
colors = ['slategray', 'skyblue', 'goldenrod']  # Define colors for plotting

# Calculate ROC curves and AUCs
fpr, tpr, _ = zip(*[roc_curve(labels, data[tool]) for tool in tools])  # Compute ROC curve and AUC for each tool
aucs = [auc(x, y) for x, y in zip(fpr, tpr)]  # Compute AUC for each tool

# Plot ROC curves
plt.figure(figsize=(6, 6))  # Create a new figure with specified size

# Plot each ROC curve with corresponding AUC value and color
for i, (tool, auc_, color) in enumerate(zip(tools, aucs, colors)):
    plt.plot(fpr[i], tpr[i], label=f'{tool} (AUC: {auc_:0.4f})', linewidth=2, color=color)

plt.plot([0, 1], [0, 1], linestyle='--', color='red', label='baseline', linewidth=2)  # Plot the baseline diagonal line

plt.xlabel('False Positive Rate (FPR)')  # Set label for x-axis
plt.ylabel('True Positive Rate (TPR)')  # Set label for y-axis
