In [1]:
#===========================IMPORT LIBRARIES===========================#

import matplotlib.pyplot as plt # for plotting
import numpy as np # for numerical operations
import pandas as pd # for data manipulation in dataframes

import sklearn.metrics as metrics 

from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes # for zoomed in plots
from mpl_toolkits.axes_grid1.inset_locator import mark_inset # for zoomed in plots

#===============SET PARAMETERS FOR CUSTOM PLOTTING====================#

# Set the inside ticks to be default
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['xtick.minor.visible'] = True
plt.rcParams['ytick.minor.visible'] = True

# Set font size of x and y labels to be 12
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Set custom color cycle
custom_colors = [    
    '#0081C8',  # Olynmpic Blue
    '#EE334E',  # Olynmpic Red
    '#00A651',  # Olympic Green
    '#FCB131', # Olympic Yellow
    '#ba55d3',  # Medium Orchid
    '#00ced1',  # Dark Turquoise
    '#a89078', #Pastel brown
    '#e9967a',  # Dark Salmon
    '#66cdaa',   # Medium Aquamarine
    '#db7093'  # Pale Violet Red
]
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=custom_colors)

In [3]:
#===========================fUNCTIONS===========================#

def load_classification_results(file_path):
    results = pd.read_csv(file_path)
    true_labels = results['pid']
    predicted_labels = results.iloc[:, 1:-2]
    return true_labels, predicted_labels

def plot_confusion_matrix(true_labels, predicted_labels, name, save_path):
    max_pred_names = predicted_labels.idxmax(axis=1)

    # Define particle numbers
    neutrinos = [-12, 12, -14, 14, -16, 16]
    muons = [-13, 13]
    noises = [-1, 7]

    # Define dictionary to convert particle names to particle numbers
    particle_dict = {neutrino:'pid_neutrino_pred' for neutrino in neutrinos} # muon:'pid_muon_pred' for muon in muons, noise:'pid_noise_pred' for noise in noises}
    particle_dict.update({muon:'pid_muon_pred' for muon in muons})
    particle_dict.update({noise:'pid_noise_pred' for noise in noises})

    # Apply dictionary to convert true labels to particle names
    true_labels_name = true_labels.map(particle_dict)

    # Calculate confusion matrix
    confusion_matrix = pd.crosstab(true_labels_name, max_pred_names)

    # Plot the confusion matrix, print both absolute and percentage values in the cells
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.matshow(confusion_matrix, cmap='Blues')
    for i in range(len(confusion_matrix.index)):
        for j in range(len(confusion_matrix.columns)):
            ax.text(j, i, f'{confusion_matrix.iloc[i, j]} ({confusion_matrix.iloc[i, j]/confusion_matrix.sum(axis=1)[i]*100:.2f}%)', ha='center', va='center', color='black')
    custom_axis_labels = ['Muon', 'Neutrino','Noise']
    ax.set_xticklabels([''] + custom_axis_labels)
    ax.set_yticklabels([''] + custom_axis_labels)
    ax.set_xlabel('Predicted label')
    ax.set_ylabel('True label')

    # Save the plot
    plt.savefig(save_path + name + '_confusion_matrix.png', dpi=300)
    plt.show()

def plot_roc_curve(true_labels, predicted_labels, name, save_path, zoom=True):
    # Define particle numbers
    neutrinos = [-12, 12, -14, 14, -16, 16]
    muons = [-13, 13]
    noises = [-1, 7]

    # Define dictionary to convert particle numbers to particle names
    particle_dict = {neutrino:'pid_neutrino_pred' for neutrino in neutrinos} # muon:'muon' for muon in muons, noise:'noise' for noise in noises}
    particle_dict.update({muon:'pid_muon_pred' for muon in muons})
    particle_dict.update({noise:'pid_noise_pred' for noise in noises})

    # Apply dictionary to convert true labels to particle names
    true_labels_name = true_labels.map(particle_dict)

    # Define masks for the true labels
    neutrino_mask = true_labels_name == 'pid_neutrino_pred'
    muon_mask = true_labels_name == 'pid_muon_pred'
    noise_mask = true_labels_name == 'pid_noise_pred'

    # Calculate the fpr and tpr for all particles
    fpr_neutrino, tpr_neutrino, _ = metrics.roc_curve(neutrino_mask, predicted_labels['pid_neutrino_pred'])
    fpr_muon, tpr_muon, _ = metrics.roc_curve(muon_mask, predicted_labels['pid_muon_pred'])
    fpr_noise, tpr_noise, _ = metrics.roc_curve(noise_mask, predicted_labels['pid_noise_pred'])

    # Calculate the AUC for all particles
    auc_neutrino = metrics.auc(fpr_neutrino, tpr_neutrino)
    auc_muon = metrics.auc(fpr_muon, tpr_muon)
    auc_noise = metrics.auc(fpr_noise, tpr_noise)

    # Plot the ROC curve
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(fpr_neutrino, tpr_neutrino, label=f'Neutrino (AUC = {auc_neutrino:.4f})')
    ax.plot(fpr_muon, tpr_muon, label=f'Muon (AUC = {auc_muon:.4f})')
    ax.plot(fpr_noise, tpr_noise, label=f'Noise (AUC = {auc_noise:.4f})')
    
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.legend()

    # Add zoomed in plot if zoom is True
    if zoom:
        x1, x2, y1, y2 = 0.0, 0.03, 0.9, 1.0
        axins = ax.inset_axes([0.3, 0.3, 0.5, 0.5], xlim=(x1, x2), ylim=(y1, y2))
        mark_inset(ax, axins, loc1=1, loc2=3, fc="none", ec="0.5")

        # Plot the zoomed in plot
        axins.plot(fpr_neutrino, tpr_neutrino, label=f'Neutrino (AUC = {auc_neutrino:.4f})')
        axins.plot(fpr_muon, tpr_muon, label=f'Muon (AUC = {auc_muon:.4f})')
        axins.plot(fpr_noise, tpr_noise, label=f'Noise (AUC = {auc_noise:.4f})')

    # Save the plot
    plt.savefig(save_path + name + '_roc_curve.png', dpi=300)
    plt.show()

def plot_neutrino_prob_distribution(true_labels, predicted_labels, name, save_path):
    # Define particle numbers
    neutrinos = [-12, 12, -14, 14, -16, 16]
    muons = [-13, 13]
    noises = [-1, 7]

    # Define dictionary to convert particle numbers to particle names
    particle_dict = {neutrino:'pid_neutrino_pred' for neutrino in neutrinos} # muon:'muon' for muon in muons, noise:'noise' for noise in noises}
    particle_dict.update({muon:'pid_muon_pred' for muon in muons})
    particle_dict.update({noise:'pid_noise_pred' for noise in noises})

    # Apply dictionary to convert true labels to particle names
    true_labels_name = true_labels.map(particle_dict)

    # Define masks for the true labels
    neutrino_mask = true_labels_name == 'pid_neutrino_pred'
    muon_mask = true_labels_name == 'pid_muon_pred'
    noise_mask = true_labels_name == 'pid_noise_pred'

    # Plot the neutrino probability distribution for neutrinos, muons and noises
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.hist(predicted_labels['pid_neutrino_pred'][neutrino_mask], bins=100, range=(0,1), histtype='step', label='Neutrino')
    ax.hist(predicted_labels['pid_neutrino_pred'][muon_mask], bins=100, range=(0,1), histtype='step', label='Muon')
    ax.hist(predicted_labels['pid_neutrino_pred'][noise_mask], bins=100, range=(0,1), histtype='step', label='Noise')
    ax.set_xlabel('Predicted Neutrino Probability')
    ax.set_ylabel('Counts')
    ax.set_yscale('log')
    ax.legend()

    # Save the plot
    plt.savefig(save_path + name + '_neutrino_prob_distribution.png', dpi=300)
    plt.show()


In [None]:
# Plot results of the validation set
validation_path = '/groups/icecube/luc/Workspace/GraphNet/work/LE_3_classification_Frederik/Result_CSVs/validation_results.csv'

true_labels, predicted_labels = load_classification_results(validation_path)
plot_confusion_matrix(true_labels, predicted_labels, 'validation', '/groups/icecube/luc/Workspace/GraphNet/work/LE_3_classification_Frederik/Plots/')
plot_roc_curve(true_labels, predicted_labels, 'validation', '/groups/icecube/luc/Workspace/GraphNet/work/LE_3_classification_Frederik/Plots/')
plot_neutrino_prob_distribution(true_labels, predicted_labels, 'validation', '/groups/icecube/luc/Workspace/GraphNet/work/LE_3_classification_Frederik/Plots/')

In [None]:
# Plot results of the test set
test_path = '/groups/icecube/luc/Workspace/GraphNet/work/LE_3_classification_Frederik/Result_CSVs/test_results.csv'

true_labels, predicted_labels = load_classification_results(test_path)
plot_confusion_matrix(true_labels, predicted_labels, 'test', '/groups/icecube/luc/Workspace/GraphNet/work/LE_3_classification_Frederik/Plots/')
plot_roc_curve(true_labels, predicted_labels, 'test', '/groups/icecube/luc/Workspace/GraphNet/work/LE_3_classification_Frederik/Plots/')
plot_neutrino_prob_distribution(true_labels, predicted_labels, 'test', '/groups/icecube/luc/Workspace/GraphNet/work/LE_3_classification_Frederik/Plots/')