In [None]:
import sys

sys.path.append("..")  # Ensure the parent directory is in the path

import core.assignment_models as Models
from core.DataLoader import DataPreprocessor, DataConfig, LoadConfig
import core
import numpy as np
from importlib import reload
import keras
import matplotlib.pyplot as plt
import yaml

MAX_JETS = 6

PLOTS_DIR = f"plots/transformer/"
import os

if not os.path.exists(PLOTS_DIR):
    os.makedirs(PLOTS_DIR)

neutrino_momenta_branches = ["nu_flows_neutriono_p_x", "nu_flows_neutriono_p_y", "nu_flows_neutriono_p_z", "nu_flows_anti_neutriono_p_x", "nu_flows_anti_neutriono_p_y", "nu_flows_anti_neutriono_p_z"]
truth_neutrino_momenta_branches = ["truth_tbar_neutriono_px", "truth_tbar_neutriono_py", "truth_tbar_neutriono_pz", "truth_top_neutriono_px", "truth_top_neutriono_py", "truth_top_neutriono_pz"]

load_config = LoadConfig(
    jet_features=[
        "ordered_jet_pt",
        "ordered_jet_eta",
        "ordered_jet_phi",
        "ordered_jet_e",
        "ordered_jet_b_tag",
    ],
    lepton_features=["lep_pt", "lep_eta", "lep_phi", "lep_e"],
    jet_truth_label="ordered_event_jet_truth_idx",
    lepton_truth_label="event_lepton_truth_idx",
    met_features=["met_met_NOSYS", "met_phi_NOSYS"],
    max_leptons=2,
    max_jets=MAX_JETS,
    non_training_features=["truth_ttbar_mass", "truth_ttbar_pt", "N_jets", "truth_initial_parton_num_gluons", "truth_top_mass", "truth_tbar_mass"] + neutrino_momenta_branches+truth_neutrino_momenta_branches,
    event_weight="weight_mc_NOSYS",
)

DataProcessor = DataPreprocessor(load_config)
data_config_path = "data_config.yaml"
with open(data_config_path, 'r') as file:
    data_configs = yaml.safe_load(file)

DataProcessor.load_data(data_configs["nominal"]["data_path"], "reco", max_events = 1000000)
X_train, y_train, X_val, y_val = DataProcessor.split_data(
    test_size=0.1, random_state=42
)

data_config = load_config.to_data_config()

In [None]:
reload(Models)
reload(core)
TransformerMatcher = Models.FeatureConcatTransformer(data_config, name="Transformer")

TransformerMatcher.build_model(
    num_heads=8,
    hidden_dim=64,
    num_layers=6,
    dropout_rate=0.1,
    input_as_four_vector=True,
)

TransformerMatcher.adapt_normalization_layers(X_train)

TransformerMatcher.compile_model(
    loss = core.utils.AssignmentLoss(lambda_excl=0), optimizer=keras.optimizers.AdamW(learning_rate=1e-4, weight_decay=1e-4), metrics=[core.utils.AssignmentAccuracy()]
)
TransformerMatcher.load_model("Transformer_Assignment.keras")
TransformerMatcher.model.summary()

In [None]:
TransformerMatcher.train_model(epochs=0,
                                X_train=X_train,
                                y_train=y_train,
                                sample_weights=core.utils.compute_sample_weights(X_train, y_train),
                                batch_size=1028,
                                callbacks = keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, restore_best_weights=True, mode ="min"))

In [None]:
TransformerMatcher.save_model("Transformer_Assignment.keras")

In [None]:
import core.assignment_models.BaselineAssignmentMethods as BaselineMethods
import core.reconstruction.Evaluation as Evaluation
reload(Evaluation)
reload(BaselineMethods)
delta_r_assigner = BaselineMethods.DeltaRAssigner(data_config, name=r"$\Delta R(\ell,j)$-Assignment")
invariant_mass_assigner = BaselineMethods.LeptonJetMassAssigner(data_config, name = r"$m(\ell, j)$-Assignment")
combinatorics_assigner = BaselineMethods.MassCombinatoricsAssigner(data_config, neutrino_momenta_branches=neutrino_momenta_branches,top_mass=173.5e3, name=r"Mass-Combinatorics Assignment")
ground_truth_assigner = Evaluation.GroundTruthReconstructor(data_config, name="Ground Truth")
evaluator = Evaluation.ReconstructionEvaluator([delta_r_assigner,invariant_mass_assigner,combinatorics_assigner,TransformerMatcher, ground_truth_assigner], X_val, y_val, neutrino_momenta_branches=truth_neutrino_momenta_branches)

In [None]:
fig, ax = evaluator.plot_binned_top_mass_resolution(feature_data_type='non_training', feature_name='truth_ttbar_mass', fancy_feautre_label=r"$m(t\overline{t})$ [GeV]", xlims = (340e3,800e3))
fig.savefig(PLOTS_DIR + "top_mass_resolution_comparison.png")

In [None]:
fig, ax = evaluator.plot_all_accuracies()
fig.savefig(PLOTS_DIR + "all_accuracies.pdf")

In [None]:
fig, ax = evaluator.plot_binned_complementarity(feature_data_type='non_training', feature_name='truth_ttbar_mass', fancy_feautre_label=r"$m(t\overline{t})$ [GeV]", xlims = (340e3,800e3))
fig.savefig(PLOTS_DIR + "binned_complementarity_ttbar_mass.pdf")

In [None]:
fig, ax = evaluator.plot_binned_accuracy(feature_data_type='non_training', feature_name='truth_ttbar_mass', fancy_feautre_label=r"$m(t\overline{t})$ [GeV]", xlims = (340e3,800e3))
fig.savefig(PLOTS_DIR + "binned_accuracy_ttbar_mass.pdf")

In [None]:
fig, ax = evaluator.plot_binned_accuracy(feature_data_type='non_training', feature_name='truth_ttbar_pt', fancy_feautre_label=r"$p_T(t\overline{t})$ [GeV]", xlims = (0,400e3))
fig.savefig(PLOTS_DIR + "binned_accuracy_ttbar_pT.pdf")

In [None]:
fig, ax = evaluator.plot_binned_accuracy(feature_data_type='non_training', feature_name='truth_initial_parton_num_gluons', fancy_feautre_label=r"Initial State", xlims = (0,3),bins = 3 )
ax.set_xticks([0.5,1.5,2.5])
ax.set_xticklabels([r"$qq\to tt$", r"$qg\to tt$", r"$gg\to tt$"])
fig.savefig(PLOTS_DIR + "binned_accuracy_initial_state.pdf")

In [None]:
fig, ax = evaluator.plot_binned_accuracy(feature_data_type='non_training', feature_name='N_jets', fancy_feautre_label=r"$\# \text{jets}$", xlims = (2,6),bins = 4 )
ax.set_xticks([2.5,3.5,4.5,5.5])
ax.set_xticklabels([2,3,4,5])
fig.savefig(PLOTS_DIR + "binned_accuracy_N_jets.pdf")

In [None]:
fig,ax = evaluator.plot_confusion_matrices()
fig.savefig(PLOTS_DIR + "confusion_matrices.pdf")

In [None]:
fig, ax = evaluator.plot_complementarity_matrix(figsize=(8.5,8))
fig.savefig(PLOTS_DIR + "complementarity_matrix.pdf")