In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
from itertools import product
from scipy import stats
import histlite as hl
import sys

In [None]:
import matplotlib.style as style
#style.use('tableau-colorblind10')
style.use('seaborn-v0_8-bright')
sns.set(style="ticks")

In [None]:
cluster = "woody"

if cluster == "woody":
    save_path = "/home/saturn/capn/mppi133h/master_thesis/tau_appearance/reconstruction_perfomance/plots"
    sys.path.append("/home/saturn/capn/mppi133h/master_thesis/tau_appearance/reconstruction_perfomance")
    path = "/home/wecapstor3/capn/mppi133h/ANTARES/mc"
elif cluster == "lyon":
    save_path = "/sps/km3net/users/mchadoli/master_thesis/tau_appearance/reconstuction_perfomance/plots/"
    sys.path.append("/sps/km3net/users/mchadoli/master_thesis/tau_appearance/reconstuction_perfomance")
    path = "/sps/km3net/users/mchadoli/ANTARES/"
    

import scripts.file_management as fm
from scripts.lib_masks import *

In [None]:
normal_size = (15, 10)
#plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 1000

In [None]:
# Set the font dictionaries (for plot title and axis titles)
suptitle_font = {'fontname':'sans-serif', 'size':'16', 'color':'black', 'weight':'normal',
              'verticalalignment':'bottom'}

title_font = {'fontname':'sans-serif', 'size':'20', 'color':'black', 'weight':'normal',}

axis_font = {'fontname':'sans-serif', 'size':'14'}

In [None]:
columns = [
    "run_id",
    "frame_index",
    "event_trigger_counter",
    "interaction_type",
    "is_cc",
    "type",
    "energy_true",
    "cos_zenith_true"
]

In [None]:
def rename_h5_df_cols(
    df,
    mapper={
        "RunID": "run_id",
        "EventID": "frame_index", 
        "TrigCount": "event_trigger_counter",
    },
):
    return df.rename(columns=mapper)

In [None]:
def check_negative_energy(df, column):
    df[column] = np.where(df[column] < 0, np.nan, df[column])

In [None]:
# Load the data
print("Loading the data...")

# Load the dataframes
print("Importing into dataframes...")

# Load nnfit reco files
nnfit_files = fm.list_files_with_pattern(path+"nnfit_reco", "*taus*")

In [None]:
df_nnfit = fm.load_dataframes(nnfit_files, folder_path=path+"nnfit_reco")

In [None]:
rootfile = os.path.join(path, "nnfit_sample/nutau.root")
dfnu = fm.rootfile_to_df(rootfile, columns=columns)

dfnu_low_ma = get_low_energymask(dfnu)
dfnu = dfnu.loc[dfnu_low_ma]

In [None]:
print("Renaming the columns...")
df_nnfit = rename_h5_df_cols(df_nnfit)

In [None]:
# Merge the two dataframes
df = dfnu.merge(df_nnfit, on=["run_id","frame_index","event_trigger_counter"], how="inner")

print("Number of events in the merged dataframe: ", df.shape[0])
print("Number of NNfit events:", df_nnfit.shape[0])
print("Number of nutau events:", dfnu.shape[0])

In [None]:
# Create the masks for the flavour types
df_el_ma = get_nuemask(df)
df.loc[df_el_ma, "Flavour type"] = "electron"

df_muon_ma = get_numumask(df)
df.loc[df_muon_ma, "Flavour type"] = "muon"

df_tau_ma = get_nutaumask(df)
df.loc[df_tau_ma, "Flavour type"] = "tau"

In [None]:
# Create the masks for the event types
df_track_ma = get_trackmask(df)
df.loc[df_track_ma, "Event type"] = "tracks"

df_shower_nc_ma = get_showermask_nc(df)
df.loc[df_shower_nc_ma, "Event type"] = "showers_nc"

df_shower_cc_ma = get_showermask_cc(df)
df.loc[df_shower_cc_ma, "Event type"] = "showers_cc"

In [None]:
label_dict = {
    "Shower Energy": "NNFitShower_Energy",
    "Track Energy": "NNFitTrack_Energy",
    "Track Direction": "NNFitTrack_cos_zenith",
    "Shower Direction": "NNFitShower_cos_zenith",
    "Shower CC": df_shower_cc_ma,
    "Track": df_track_ma,
    "Shower NC":df_shower_nc_ma,
    "showers_cc":"Shower CC Events",
    "showers_nc":"Shower NC Events",
    "tracks":"Track Events",
}

In [None]:
df["NNFitTrack_Energy"] = df["NNFitTrack_Log10Energy"].apply(lambda x: 10**x)
df["NNFitShower_Energy"] = df["NNFitShower_Log10Energy"].apply(lambda x: 10**x)

In [None]:
def cos_zenith_from_theta(df, reco_algo):
    return df[reco_algo].apply(lambda x: -np.cos(np.radians(x)))

In [None]:
df["NNFitTrack_cos_zenith"] = cos_zenith_from_theta(df, "NNFitTrack_Theta")
df["NNFitShower_cos_zenith"] = cos_zenith_from_theta(df, "NNFitShower_Theta")

In [None]:
def plot_reco_events(df, 
                     recos, 
                     mask,
                     task,
                     log_scale=(True, True), # (x, y)
                     xlim=None,
                     ylim=None, 
                     bins=100, 
                     suffix=""):
    
    # Assign number of subplots
    plt_row = len(recos)
    plt_col = 1 # number of flavours
    
    # Task to be plotted
    if task == "energy":
        x_data = "energy_true"
        xlabel = "True Energy [GeV]"
        ylabel = "Reconstructed Energy [GeV]"
        presuffix = "energy_reco_2dhisto"
    elif task == "direction":
        x_data = "cos_zenith_true"
        xlabel = r"True zenith $\cos(\theta)$"
        ylabel = r"Reconstructed zenith $\cos(\theta)$"
        presuffix = "dir_reco_2dhisto"
        
    # Create the figure
    fig, ax = plt.subplots(nrows=plt_row, ncols=plt_col, figsize= normal_size)
    
    #Flatten the axis
    ax = ax.flatten()
    
    for ax, reco in zip(ax, recos):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")        
            sns.histplot(data= df[mask], x=x_data, y=reco,
                         bins=bins, log_scale= log_scale, ax=ax,
                         cbar=True, cbar_kws=dict(shrink=.65), cmap="cividis"
                         )
            if task == "energy":    
                ax.plot((0,1e2), (0,1e2), "k--", color="red",  alpha=0.75, zorder=2, linewidth=4) 
                
            ax.tick_params(axis='both', which='major', labelsize=12)
            ax.tick_params(axis='both', which='minor', rotation = 20, labelsize=10)
            ax.set(xlabel=None, ylabel=None)
            ax.set_title(f"{reco}", fontdict= suptitle_font) 
            if xlim:
                ax.set_xlim(xlim)
            if ylim:
                ax.set_ylim(ylim)
    fig.suptitle(f"True vs Reco for {df[mask]['Event type'].unique()[0]}", fontdict= title_font)
    fig.supxlabel(xlabel, fontdict= axis_font)
    fig.supylabel(ylabel, fontdict= axis_font)
    fig.tight_layout()
    fig.savefig(os.path.join(save_path, f"{df[mask]['Event type'].unique()[0]}_{presuffix}_{suffix}.png"))

In [None]:
plot_reco_events(df, ["NNFitTrack_cos_zenith", 'NNFitShower_cos_zenith'] , df_shower_cc_ma, "direction", log_scale=(False,False), ylim=(-1,0), xlim=(-1,0), bins=100, suffix="nnfit")

In [None]:
plot_reco_events(df, ["NNFitTrack_Energy", "NNFitShower_Energy"], df_shower_cc_ma, task= "energy", xlim=(1e1, 1e2), ylim=(10, 1e3), suffix="nnfit")

In [None]:
def normalize_along_x_axis(
    hist
):
    normalized_values = np.zeros((hist.n_bins[0], hist.n_bins[1]))

    for x_bin in range(hist.n_bins[0]):
        normalized_values[x_bin,:] = hist.values[x_bin,:] / hist.sum(1).get_values()[x_bin]
    
    return normalized_values.T

In [None]:
def plot_2dhisto(
    hist,
    values,
    type = "energy"
):  
    print(f"Plotting normalized distribution of {type}...")
      
    # Plot heatmap
    
    plt.imshow(values, cmap='cividis', origin='lower', aspect='auto', vmax=.15)

    # Add colorbar
    plt.colorbar(label='Probability')

    if type == "energy_track":
        distance = 4
        plt.xticks(np.arange(0, hist.n_bins[0] + 1, distance), np.round(hist.bins[0][::distance], 2))
        plt.yticks(np.arange(0, hist.n_bins[1] + 1, 2 * distance), np.round(hist.bins[1][::2 * distance]))
        plt.plot((0,24), (0,17), "r--",  alpha=0.75, zorder=2, linewidth=3)
    elif type == "energy_shower":
        distance = 6
        plt.xticks(np.arange(0, hist.n_bins[0] + 1, distance), np.round(hist.bins[0][::distance], 2))
        plt.yticks(np.arange(0, hist.n_bins[1] + 1, 2 * distance), np.round(hist.bins[1][::2 * distance]))
        plt.plot((0,49), (0,25), "r--",  alpha=0.75, zorder=2, linewidth=3)
    elif type == "direction":
        distance = 8
        plt.xticks(np.arange(0, hist.n_bins[0] + 1, distance), np.round(hist.bins[0][::distance], 2))
        plt.yticks(np.arange(0, hist.n_bins[1] + 1, distance), np.round(hist.bins[1][::distance], 2))
        plt.plot((0,49), (0,49), "r--",  alpha=0.75, zorder=2, linewidth=3)
        
    return plt.gca() 

In [None]:
def run_all_plots(
    df,
    dict,
    run_comb,
):
    for particle, task in run_comb:
        if task == "Shower Energy":
            bins = (50, 100) ; range = ((1e1, 1e2), (1e1, 400))
        elif task == "Track Energy":
            bins = (25, 25) ; range = ((1e1, 1e2), (1e1, 150))
        elif task == "Track Direction" or task == "Shower Direction":
            bins = (50, 50) ; range = ((-1, 0), (-1, 0))
                    
        plot_normalized_2dhisto(df, dict, task, particle, bins, range)


def plot_normalized_2dhisto(
    df,
    dict,
    task = "Shower Energy",
    particle = "Shower CC",
    bins = (50, 100),
    range = ((1e1, 1e2), (1e1, 400)),
):
    # Retrieve dictionary values
    mask = dict[particle]
    algorithm = dict[task]
    events = df[mask]['Event type'].unique()[0]
    
    
    if "Direction" in task:
        hh = hl.hist((df[mask]["cos_zenith_true"], df[mask][algorithm]), bins=bins, range=range)
        xlabel = r"True zenith $\cos(\theta)$" ; ylabel = r"Reconstructed zenith $\cos(\theta)$" ; plot_type = "direction"
    elif "Energy" in task:
        hh = hl.hist((df[mask]["energy_true"], df[mask][algorithm]), bins=bins, range=range)
        xlabel = "True Energy [GeV]" ; ylabel = "Reconstructed Energy [GeV]" 
        
        if "Shower" in task:
            plot_type = "energy_shower"
        elif "Track" in task:
            plot_type = "energy_track"
    else:
        raise ValueError("task type not recognized")
    
    # Plot the 2D histogram
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(14,6))
    
    # Event Distribution
    hl.plot2d(ax1, hh, cbar=True, cmap = "cividis", zmin = 5, clabel = "Events")
    ax1.set_ylim(range[1])

    # Normalized plot
    ax2 = plot_2dhisto(hh, normalize_along_x_axis(hh), plot_type)
    
    for ax in (ax1, ax2):
        if ("Energy" in task) & (ax == ax1):
            ax.plot((0,100), (0,1e2), "r--",  alpha=0.75, zorder=2, linewidth=3) 
        elif ("Direction" in task) &  (ax == ax1):
            ax.plot((-1,0), (-1,0), "r--",  alpha=0.75, zorder=2, linewidth=3)
        ax.set(xlabel= xlabel, ylabel= ylabel)
        
    if "Shower" in task:
        fig.suptitle(f"NNFitShower for {particle} events", fontdict= title_font)
    elif "Track" in task:
        fig.suptitle(f"NNFitTrack for {particle} events", fontdict= title_font)
    fig.tight_layout()
    fig.savefig(os.path.join(save_path, f"{events}_{algorithm}_normalized_2dhisto.png"))

In [None]:
run_all_plots(df, label_dict,[("Shower CC", "Shower Energy"), ("Shower CC", "Track Energy"), ("Shower CC", "Shower Direction"), ("Shower CC", "Track Direction")])

In [None]:
# Calculate the relative error of the energy

recos = ["NNFitTrack_Log10Energy",
         "NNFitShower_Log10Energy"
        ]

for reco in recos:
    df[f"{reco}_resolution"] = (df[reco] - df["energy_true"].apply(np.log10))

In [None]:
comb = list(product(recos, df["Event type"].unique()))

In [None]:
def plot_resolution(
    df,
    comb,
    nrows = 2,
    ncols = 3,
    bins = (50, 100),
    x_data = "energy_true",
    suffix = "direction_2dhisto.png",
):
    
    fig, axs = plt.subplots(ncols=ncols, nrows=nrows, figsize= normal_size)
    for ax, (reco, event_type) in zip(axs.flatten(), comb):
        sns.histplot(data=df[df["Event type"] == event_type], x=x_data, y=f"{reco}_resolution",
                        bins=bins, ax=ax, cbar=True, cbar_kws=dict(shrink=.65), cmap = "cividis")
        ax.set(xlabel=None, ylabel=None, ylim=(-1, 1))
        ax.tick_params(axis='both', labelsize=12)
        label_events = label_dict[event_type]
        if reco == "NNFitTrack_Log10Energy":
            ax.set_title(f"{label_events} with NNFitTracK", fontdict= suptitle_font)
        elif reco == "NNFitShower_Log10Energy":
            ax.set_title(f"{label_events} with NNFitShower", fontdict= suptitle_font)
    fig.suptitle("Resolution for the NNFit reco", fontdict= title_font)
    if x_data == "cos_zenith_true":
        fig.supxlabel(r"True zenith $\cos(\theta)$", fontdict= axis_font)
    elif x_data == "energy_true":
        fig.supxlabel("True energy (GeV)", fontdict= axis_font)
    fig.supylabel(r"$\log{E_{reco}}-\log{E_{true}}$", fontdict= axis_font)
    fig.tight_layout()
    fig.savefig(save_path + "error_energy_recos_"+ suffix)

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    plot_resolution(df, comb, x_data="cos_zenith_true", suffix="direction_2dhisto_nnfit.png")  

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    plot_resolution(df, comb, x_data="energy_true", suffix="energy_2dhisto_nnfit.png")