In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
from itertools import product
import sys


In [None]:
sns.set(style="whitegrid")

In [None]:
cluster = "woody"

if cluster == "woody":
    save_path = "/home/saturn/capn/mppi133h/master_thesis/tau_appearance/reconstruction_perfomance/plots"
    sys.path.append("/home/saturn/capn/mppi133h/master_thesis/tau_appearance/reconstruction_perfomance")
    path = "/home/wecapstor3/capn/mppi133h/ANTARES/mc"
elif cluster == "lyon":
    save_path = "/sps/km3net/users/mchadoli/master_thesis/tau_appearance/reconstuction_perfomance/plots/"
    sys.path.append("/sps/km3net/users/mchadoli/master_thesis/tau_appearance/reconstuction_perfomance")
    path = "/sps/km3net/users/mchadoli/ANTARES/"
    

import scripts.file_management as fm
from scripts.lib_masks import *

In [None]:
columns_energy = [
    "run_id",
    "frame_index",
    "event_trigger_counter",
    "interaction_type",
    "is_cc",
    "type",
    "energy_true",
    "cos_zenith_true",
    "energy_aafit_dEdX_CEA",
    "energy_aafit_ANN_ECAP",
    "energy_aafit_R_Bologna",
    "showerdusj_energy",
    "showertantra_energy",
]

In [None]:
def rename_h5_df_cols(
    df,
    mapper={
        "RunID": "run_id",
        "EventID": "frame_index", 
        "TrigCount": "event_trigger_counter",
    },
):
    return df.rename(columns=mapper)

In [None]:
def check_negative_energy(df, column):
    df[column] = np.where(df[column] < 0, np.nan, df[column])

In [None]:
# Load the data
print("Loading the data...")


# Load the dataframes
print("Importing into dataframes...")

# Load nnfit reco files
nnfit_files = fm.list_files_with_pattern(path+"nnfit_reco", "*taus*")

In [None]:
df_nnfit = fm.load_dataframes(nnfit_files, folder_path=path+"nnfit_reco")

In [None]:
rootfile = os.path.join(path, "nnfit_sample/nutau.root")
dfnu = fm.rootfile_to_df(rootfile, columns=columns_energy)

In [None]:
dfnu_low_ma = get_low_energymask(dfnu)
dfnu = dfnu.loc[dfnu_low_ma]

In [None]:
print("Renaming the columns...")
df_nnfit = rename_h5_df_cols(df_nnfit)

In [None]:
# Merge the two dataframes
df = dfnu.merge(df_nnfit, on=["run_id","frame_index","event_trigger_counter"], how="left")

print("Number of events in the merged dataframe: ", df.shape[0])
print("Number of NNfit events:", df_nnfit.shape[0])
print("Number of nutau events:", dfnu.shape[0])

In [None]:
# Create the masks for the flavour types
df_el_ma = get_nuemask(df)
df.loc[df_el_ma, "Flavour type"] = "electron"

df_muon_ma = get_numumask(df)
df.loc[df_muon_ma, "Flavour type"] = "muon"

df_tau_ma = get_nutaumask(df)
df.loc[df_tau_ma, "Flavour type"] = "tau"

In [None]:
# Create the masks for the event types
df_track_ma = get_trackmask(df)
df.loc[df_track_ma, "Event type"] = "tracks"

df_shower_nc_ma = get_showermask_nc(df)
df.loc[df_shower_nc_ma, "Event type"] = "showers_nc"

df_shower_cc_ma = get_showermask_cc(df)
df.loc[df_shower_cc_ma, "Event type"] = "showers_cc"

In [None]:
df["NNFitTrack_Energy"] = df["NNFitTrack_Log10Energy"].apply(lambda x: 10**x)
df["NNFitShower_Energy"] = df["NNFitShower_Log10Energy"].apply(lambda x: 10**x)

In [None]:
def plot_energy_reco(df, 
                     recos, 
                     mask,
                     plt_col,
                     plt_row, 
                     bins=100, 
                     xlim=(1e1, 1e2),
                     ylim=(10, 1e3),
                     xlabel="True Energy (GeV)", 
                     ylabel="Reconstructed Energy (GeV)",
                     path="/sps/km3net/users/mchadoli/tau_appearance/expected_events/plots/"
                     ):
    fig, ax = plt.subplots(nrows=plt_row, ncols=plt_col, figsize=(10, 10))
    
    #Flatten the axis
    ax = ax.flatten()
    
    for ax, reco in zip(ax, recos):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")        
            sns.histplot(data= df[mask][df[mask][reco]>0], x="energy_true", y=reco,
                         bins=bins, log_scale=(True,True), ax=ax, 
                         cbar=True,  cmap='viridis', cbar_kws=dict(shrink=.65),
                         )    
            ax.plot((0,1e2), (0,1e2), "k--", color="red",  alpha=0.75, zorder=2, linewidth=4) 
            ax.tick_params(axis='both', which='minor', rotation= 30, labelsize=10)
            ax.set(xlabel=None, ylabel=None)
            ax.set_title(f"{reco}", fontsize=22)
            if xlim:
                ax.set_xlim(xlim)
            if ylim:
                ax.set_ylim(ylim)
    fig.suptitle(f"True vs Reco for {df[mask]['Event type'].unique()[0]}", fontsize=24)
    fig.supxlabel(xlabel, fontsize=22)
    fig.supylabel(ylabel, fontsize=22)
    fig.tight_layout()
    fig.savefig(os.path.join(path, f"{df[mask]['Event type'].unique()[0]}_energy_reco_2dhisto.png"))

In [None]:
# Plotting for the track events

reco = ["energy_aafit_dEdX_CEA",
        "energy_aafit_ANN_ECAP",
        "NNFitTrack_Energy",
        "NNFitShower_Energy",
        ]

plot_energy_reco(df, reco, df_track_ma, 2, 2)

In [None]:
# Plotting for the shower events
reco = ["energy_aafit_dEdX_CEA",
        "energy_aafit_ANN_ECAP",
        "NNFitTrack_Energy",
        "NNFitShower_Energy",
        ]

plot_energy_reco(df, reco, df_shower_cc_ma, 2, 2)

In [None]:
recos = ["energy_aafit_dEdX_CEA",
        "energy_aafit_ANN_ECAP",
        "NNFitTrack_Energy",
        "NNFitShower_Energy",
        ]


plot_energy_reco(df, reco, df_shower_nc_ma, 2, 2)

In [None]:
# Calculate the relative error of the energy
track_energy_reco = ["energy_aafit_dEdX_CEA", "energy_aafit_ANN_ECAP", "NNFitTrack_Energy"]
shower_energy_reco = ["showerdusj_energy", "showertantra_energy", "NNFitShower_Energy"]

for algo in track_energy_reco + shower_energy_reco:        
    df[f"{algo}_relative_error"] = abs(df[f"{algo}"] - df["energy_true"])/df["energy_true"]

In [None]:
# Create the true energy bins
energy_true_bins = np.linspace(10, 100, 25)
df["energy_true_bins"] = pd.cut(df["energy_true"], bins=energy_true_bins)

In [None]:
def calculate_energy_reco_median(df, reco, energy_bins):
    return df.groupby("energy_true_bins")[reco].median().reset_index()

In [None]:
# Create the median error dataframe
mean_error = pd.DataFrame()

columns_errors = ["median_error_nnfit_track", "median_error_nnfit_shower", "median_error_aafit_dEdX_CEA", "median_error_aafit_ANN_ECAP", "median_error_showerdusj_energy", "median_error_showertantra_energy"]
df_cols = ["NNFitTrack_Energy_relative_error", "NNFitShower_Energy_relative_error", "energy_aafit_dEdX_CEA_relative_error", "energy_aafit_ANN_ECAP_relative_error", "showerdusj_energy_relative_error", "showertantra_energy_relative_error"]

dfs = df[(df["Flavour type"] == "tau") & (df["Event type"] == "showers_cc")]

# Group the data by the true energy bins
with warnings.catch_warnings():
    warnings.simplefilter("ignore")   
    for col, df_col in zip(columns_errors, df_cols):
        mean_error[col] = dfs.groupby("energy_true_bins")[df_col].median()

energy_true_bins_midpoints = dfs["energy_true_bins"].apply(lambda x: x.mid).unique()

midpoints = [(energy_true_bins[i] + energy_true_bins[i + 1]) / 2 for i in range(len(energy_true_bins) - 1)]
mean_error["midpoints"] = midpoints
mean_error.reset_index(inplace=True)
mean_error.head()


In [None]:
fig, ax = plt.subplots(figsize=(20, 10), sharex=True)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")        
    for col in columns_errors:
        sns.lineplot(data=mean_error, x=midpoints, y=mean_error[col],  marker="o", markersize=10, linestyle="-", ax=ax)
    ax.set_ylabel(r"$\frac{|E_{rec} - E_{true}|}{E_{true}}$", fontsize=24)
    ax.set_xlabel("True energy (GeV)", fontsize=24)
    ax.set_title("Median absolute relative error for energy reconstructions", fontsize=24)
    #sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, .7), fontsize=15)   
    fig.tight_layout()
    fig.show()
fig.savefig(os.path.join(save_path, "median_absolute_relative_error.png"))

In [None]:
# Calculate the relative error of the energy

recos = ["NNFitTrack_Log10Energy",
         "NNFitShower_Log10Energy"
        ]

for reco in recos:
    df[f"{reco}_resolution"] = (df[reco] - df["energy_true"].apply(np.log10))

In [None]:
comb = list(product(recos, df["Event type"].unique()))