In [None]:
import sys

sys.path.append("..")

import core.assignment_models as Models
from core.DataLoader import DataPreprocessor, DataConfig, LoadConfig
import core
from importlib import reload
import numpy as np
import keras
import matplotlib.pyplot as plt
import yaml

MAX_JETS = 6

PLOTS_DIR = f"plots/baseline/"
import os

if not os.path.exists(PLOTS_DIR):
    os.makedirs(PLOTS_DIR)

neutrino_momenta_branches = [
    "nu_flows_neutrino_px",
    "nu_flows_neutrino_py",
    "nu_flows_neutrino_pz",
    "nu_flows_antineutrino_px",
    "nu_flows_antineutrino_py",
    "nu_flows_antineutrino_pz",
]
truth_neutrino_momenta_branches = [
    "truth_top_neutrino_px",
    "truth_top_neutrino_py",
    "truth_top_neutrino_pz",
    "truth_tbar_neutrino_px",
    "truth_tbar_neutrino_py",
    "truth_tbar_neutrino_pz",
]

load_config = LoadConfig(
    jet_features=[
        "ordered_jet_pt",
        "ordered_jet_eta",
        "ordered_jet_phi",
        "ordered_jet_e",
        "ordered_jet_b_tag",
    ],
    lepton_features=["lep_pt", "lep_eta", "lep_phi", "lep_e"],
    jet_truth_label="ordered_event_jet_truth_idx",
    lepton_truth_label="event_lepton_truth_idx",
    met_features=["met_met_NOSYS", "met_phi_NOSYS"],
    max_leptons=2,
    max_jets=MAX_JETS,
    non_training_features=[
        "truth_ttbar_mass",
        "truth_ttbar_pt",
        "N_jets",
        "truth_initial_parton_num_gluons",
        "truth_top_mass",
        "truth_tbar_mass",
    ]
    + neutrino_momenta_branches
    + truth_neutrino_momenta_branches,
    event_weight="weight_mc_NOSYS",
)

DataProcessor = DataPreprocessor(load_config)
data_config_path = "data_config.yaml"
with open(data_config_path, "r") as file:
    data_configs = yaml.safe_load(file)

DataProcessor.load_data(
    data_configs["nominal"]["data_path"], "reco", max_events=1000000
)

def compute_neutrino_prediction_deviation(data_dict, data_config : DataConfig):
    data_index_dict = data_config.feature_indices["non_training"]
    data_non_training = data_dict["non_training"]

    pred_nu_momenta = np.zeros((data_non_training.shape[0], 6))
    for i, branch in enumerate(neutrino_momenta_branches):
        branch_index = data_index_dict[branch]
        pred_nu_momenta[:, i] = data_non_training[:, branch_index]
    
    true_nu_momenta = np.zeros((data_non_training.shape[0], 6))
    for i, branch in enumerate(truth_neutrino_momenta_branches):
        branch_index = data_index_dict[branch]
        true_nu_momenta[:, i] = data_non_training[:, branch_index]

    nu_deviation = np.linalg.norm(pred_nu_momenta - true_nu_momenta, axis=1)
    return nu_deviation
DataProcessor.add_custom_feature(compute_neutrino_prediction_deviation, "neutrino_prediction_deviation")


X_train, y_train, X_val, y_val = DataProcessor.split_data(
    test_size=0.1, random_state=42
)

data_config = DataProcessor.get_data_config()

In [None]:
import core.assignment_models.BaselineAssignmentMethods as BaselineMethods
import core.reconstruction as Evaluation

reload(Evaluation)
reload(BaselineMethods)
delta_r_assigner = BaselineMethods.DeltaRAssigner(
    data_config, name=r"$\Delta R(\ell,j)$-Assigner"
)
invariant_mass_assigner = BaselineMethods.LeptonJetMassAssigner(
    data_config, name=r"$m(\ell, j)$-Assigner"
)
combinatorics_assigner = BaselineMethods.MassCombinatoricsAssigner(
    data_config,
    neutrino_momenta_branches=neutrino_momenta_branches,
    top_mass=173.5e3,
    name=r"$\chi^2$-method",
)
ground_truth_assigner = Evaluation.GroundTruthReconstructor(
    data_config, name="Ground Truth"
)
evaluator = Evaluation.ReconstructionEvaluator(
    [
        delta_r_assigner,
        combinatorics_assigner,
        ground_truth_assigner,
    ],
    X_val,
    y_val,
    neutrino_momenta_branches=neutrino_momenta_branches,
)

In [None]:
evaluator.plot_feature_assignment_success("custom", feature_name="neutrino_prediction_deviation", assigner_index=1, fancy_feature_label="Neutrino Prediction Deviation (GeV)", xlims=(0, .5e6))

In [None]:
evaluator.plot_top_mass_deviation_assignment_success(assigner_index=0, xlims=(0, 1))

In [None]:
evaluator.plot_top_mass_deviation_assignment_success(assigner_index=1, xlims=(0, 1))

In [None]:
fig, ax = evaluator.plot_all_accuracies()
fig.savefig(PLOTS_DIR + "all_accuracies.pdf")

In [None]:
fig, ax = evaluator.plot_binned_top_mass_resolution(
    feature_data_type="non_training",
    feature_name="truth_ttbar_mass",
    fancy_feature_label=r"$m(t\overline{t})$ [GeV]",
    xlims=(340e3, 800e3),
)
ticks = ax.get_xticks()
ax.set_xticks(ticks)
ax.set_xticklabels([f"{int(tick/1e3)}k" for tick in ticks])
ax.set_xlim(340e3, 800e3)

fig.savefig(PLOTS_DIR + "top_mass_resolution_comparison.pdf")

In [None]:
fig, ax = evaluator.plot_binned_ttbar_mass_resolution(
    feature_data_type="non_training",
    feature_name="truth_ttbar_mass",
    fancy_feature_label=r"$m(t\overline{t})$ [GeV]",
    xlims=(340e3, 800e3),
)
ticks = ax.get_xticks()
ax.set_xticks(ticks)
ax.set_xticklabels([f"{int(tick/1e3)}k" for tick in ticks])
ax.set_xlim(340e3, 800e3)

fig.savefig(PLOTS_DIR + "ttbar_mass_resolution_comparison.pdf")

In [None]:
fig, ax = evaluator.plot_binned_complementarity(
    feature_data_type="non_training",
    feature_name="truth_ttbar_mass",
    fancy_feature_label=r"$m(t\overline{t})$ [GeV]",
    xlims=(340e3, 800e3),
)
ticks = ax.get_xticks()
ax.set_xticks(ticks)
ax.set_xticklabels([f"{int(tick/1e3)}k" for tick in ticks])
ax.set_xlim(340e3, 800e3)

fig.savefig(PLOTS_DIR + "binned_complementarity_ttbar_mass.pdf")

In [None]:
fig, ax = evaluator.plot_binned_accuracy(
    feature_data_type="non_training",
    feature_name="truth_ttbar_mass",
    fancy_feature_label=r"$m(t\overline{t})$ [GeV]",
    xlims=(340e3, 800e3),
)
ticks = ax.get_xticks()
ax.set_xticks(ticks)
ax.set_xticklabels([f"{int(tick/1e3)}k" for tick in ticks])
ax.set_xlim(340e3, 800e3)

fig.savefig(PLOTS_DIR + "binned_accuracy_ttbar_Hmmmass.pdf")

In [None]:
fig, ax = evaluator.plot_binned_accuracy(
    feature_data_type="non_training",
    feature_name="truth_ttbar_pt",
    fancy_feature_label=r"$p_T(t\overline{t})$ [GeV]",
    xlims=(0, 400e3),
    bins=10,
)
ticks = ax.get_xticks()
ax.set_xticks(ticks)
ax.set_xticklabels([f"{int(tick/1e3)}k" for tick in ticks])
ax.set_xlim(0, 400e3)
fig.savefig(PLOTS_DIR + "binned_accuracy_ttbar_pT.pdf")

In [None]:
fig, ax = evaluator.plot_binned_accuracy(
    feature_data_type="non_training",
    feature_name="truth_initial_parton_num_gluons",
    fancy_feature_label=r"Initial State",
    xlims=(0, 3),
    bins=3,
)
ax.set_xticks([0.5, 1.5, 2.5])
ax.set_xticklabels([r"$qq\to tt$", r"$qg\to tt$", r"$gg\to tt$"])
fig.savefig(PLOTS_DIR + "binned_accuracy_initial_state.pdf")

In [None]:
fig, ax = evaluator.plot_binned_accuracy(
    feature_data_type="non_training",
    feature_name="N_jets",
    fancy_feature_label=r"$\# \text{jets}$",
    xlims=(2, MAX_JETS + 1),
    bins= MAX_JETS -1,
)
ax.set_xticks([i + 0.5 for i in range(2, MAX_JETS + 1)])
ax.set_xticklabels([i for i in range(2, MAX_JETS + 1)])
fig.savefig(PLOTS_DIR + "binned_accuracy_N_jets.pdf")

In [None]:
fig, ax = evaluator.plot_confusion_matrices()
fig.savefig(PLOTS_DIR + "confusion_matrices.pdf")

In [None]:
fig, ax = evaluator.plot_complementarity_matrix(figsize=(8.5, 8))
fig.savefig(PLOTS_DIR + "complementarity_matrix.pdf")