In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
from itertools import product
import sys

In [None]:
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 1000
sns.set(style="whitegrid")


In [None]:
flv = "nutau"

if flv == "nutau":
    identifier = "tau"
    summary_file = "full_nutau_sample_sample.root"
elif flv == "mu":
    identifier = "numu"
    summary_file = "full_numu_sample_sample.root"
elif flv == "nue":
    identifier = "showers"
    summary_file = "full_nue_sample_sample.root"

In [None]:
cluster = "woody"

if cluster == "woody":
    save_path = "/home/saturn/capn/mppi133h/master_thesis/tau_appearance/reconstruction_perfomance/plots"
    sys.path.append("/home/saturn/capn/mppi133h/master_thesis/tau_appearance/reconstruction_perfomance")
    path = "/home/wecapstor3/capn/mppi133h/ANTARES/mc"
elif cluster == "lyon":
    save_path = "/sps/km3net/users/mchadoli/master_thesis/tau_appearance/reconstuction_perfomance/plots/"
    sys.path.append("/sps/km3net/users/mchadoli/master_thesis/tau_appearance/reconstuction_perfomance")
    path = "/sps/km3net/users/mchadoli/ANTARES/"
    

import scripts.file_management as fm
import scripts.lib_masks as lm

In [None]:
columns_dir = [
    "RunID",
    "EventID",
    "TriggCounter",
    "Frame",
    "interaction_type",
    "is_cc",
    "Type",
    "cos_zenith_true",
    "energy_true",
    "aafit_cos_zenith",
    "bbfit_cos_zenith",
    "gridfit_cos_zenith",
    "showertantra_cos_zenith",
    "showerdusj_cos_zenith"
]

In [None]:
def rename_h5_df_cols(
    df,
    mapper={
        "TrigCount": "TriggCounter",
        "EventID": "Frame",
    },
):
    return df.rename(columns=mapper)

In [None]:
# Load the data
print("Loading the data...")

## Load the dataframes
print("Importing into dataframes...")

# Load nnfit reco files
nnfit_files = fm.list_files_with_pattern(os.path.join(path, "nnfit_reco"), "*taus*")

df_nnfit = fm.load_dataframes(nnfit_files, folder_path=path+"nnfit_reco")

In [None]:
rootfile = os.path.join(path, f"extracted_merged/low_energy/{summary_file}")
dfnu = fm.rootfile_to_df(rootfile, columns=columns_dir)

In [None]:
print("Renaming the columns...")
df_nnfit = rename_h5_df_cols(df_nnfit)

In [None]:
# Merge the two dataframes
df = dfnu.merge(df_nnfit, on=["RunID","Frame","TriggCounter"], how="left")

print("Number of events in the merged dataframe: ", df.shape[0])
print("Number of NNfit events:", df_nnfit.shape[0])
print("Number of nutau events:", dfnu.shape[0])

In [None]:
def cos_zenith_from_theta(df, reco_algo):
    return df[reco_algo].apply(lambda x: -np.cos(np.radians(x)))

In [None]:
# Add the cos(zenith) columns

for i in ["NNFitTrack", "NNFitShower"]:
    df[f"{i}_cos_zenith"] = cos_zenith_from_theta(df, f"{i}_Theta")

In [None]:
# Apply the masks
df = lm.apply_all_masks(df)

df_track_ma = lm.get_trackmask(df)
df_shower_cc_ma = lm.get_showermask_cc(df)
df_shower_nc_ma = lm.get_showermask_nc(df)

In [None]:
def plot_cos_zenith(df, reco_algo, mask, path = "/sps/km3net/users/mchadoli/tau_appearance/expected_events/plots/"):
    xlabel = "True cos(zenith)"
    ylabel = "Reco cos(zenith)"
    bins = (50,50)

    fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(20, 10))
    # Flatten the axs array to make it easier to iterate over
    axs = axs.flatten()

    for ax, reco in zip(axs, reco_algo):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            sns.histplot(data=df[mask], x="cos_zenith_true", y=f"{reco}", 
                    bins=bins, ax=ax,
                    cbar=True, cmap = "viridis", cbar_kws=dict(shrink=.65),
                    )
            ax.set(xlabel=None, ylabel=None, ylim=(-1,0))
            ax.set_title(reco, fontsize=24) 
            ax.plot((-1, 0), (-1, 0), "k--", color="red", alpha=0.75, zorder=2) 
    fig.suptitle(f"True vs Reco for {df[mask]['Event type'].unique()[0]}", fontsize=24)
    fig.supxlabel(xlabel, fontsize=20)
    fig.supylabel(ylabel, fontsize=20)
    fig.tight_layout()
    fig.savefig(os.path.join(save_path,f"{df[mask]['Flavour type'].unique()[0]}_{df[mask]['Event type'].unique()[0]}_dir_reco_2dhisto.png"))

In [None]:
# Plot the cos(zenith) for the track reco algorithms

def plot_all_cos_zenith(df, algorithms, masks, path):
    for mask in masks:
        plot_cos_zenith(df, algorithms, mask, path)

In [None]:
plot_all_cos_zenith(
    df, 
    ["NNFitTrack_cos_zenith",
     "NNFitShower_cos_zenith",
     "aafit_cos_zenith",
     "bbfit_cos_zenith",
     "gridfit_cos_zenith",
     "showertantra_cos_zenith"],
    [df_track_ma,
     df_shower_cc_ma,
     df_shower_nc_ma],
    save_path)

In [None]:
recos = ["aafit", 
        "bbfit",
        "gridfit",
        "showertantra",
        "NNFitTrack",
        "NNFitShower",
         ]

df["theta_True"] = np.degrees(np.arccos(- df["cos_zenith_true"]))
for reco in recos:
    print(f"Calculating the zenith angle of {reco.ljust(15)}") 
    df[f"{reco}_Theta"] = np.degrees(np.arccos(- df[f"{reco}_cos_zenith"]))      
    print("Calculating the absolute error column for ", reco, "\n")
    df[f"{reco}_theta_diff"] = np.abs(df[f"{reco}_Theta"] - df["theta_True"])

In [None]:
def calculate_stats(df, reco, classfication="Event type"):
    return df.groupby(classfication)[f"{reco}_theta_diff"].mean(), df.groupby(classfication)[f"{reco}_theta_diff"].median(), df.groupby(classfication)[f"{reco}_theta_diff"].std()

In [None]:

recos = ["aafit", 
        "bbfit",
        "gridfit",
        "showertantra",
        "NNFitTrack",
        "NNFitShower",
         ]

fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(12, 12))
# Flatten the axs array to make it easier to iterate over
axs = axs.flatten()

# Define custom color palette
custom_palette = {"tracks": "blue", "showers_nc": "green", "showers_cc": "red"}


with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for ax, reco in zip(axs, recos):
        sns.kdeplot(data=df, x=f"{reco}_theta_diff",
                    hue="Event type", common_norm=False,
                    palette=custom_palette, 
                    fill=True, 
                    multiple="layer", ax=ax)
        mean, median, std = calculate_stats(df, reco)
        for event_type in df["Event type"].unique():
            print(f"Calculating the stats for {reco} and {event_type}")
            ax.axvline(mean[f"{event_type}"], color=custom_palette[f"{event_type}"], linestyle="--", label=f"{event_type} mean")
            ax.axvline(median[f"{event_type}"], color=custom_palette[f"{event_type}"], linestyle="-", label=f"{event_type} median")
        ax.set(xlabel=None, ylabel=None, xlim=(0, 100))
        ax.set_title(f"Absolute error distribution for {reco}", fontsize=14)
    fig.suptitle("Absolute error distribution for reco algorithms", fontsize=16)
    fig.supxlabel(r"$|\theta_{reco}-\theta_{true}| (\degree)$", fontsize=14)
    fig.supylabel("Density", fontsize=14)
    fig.tight_layout()
    fig.savefig(os.path.join(save_path,f"{df['Flavour type'].unique()[0]}_abs_error_dir_recos_kde_2.png"))
