# Model Evaluation Notebook

This notebook evaluates the performance of the trained transformer model against baseline methods.

## Setup and Data Loading

In [10]:
import sys
sys.path.append("..")

from core.DataLoader import DataPreprocessor, get_load_config_from_yaml
import numpy as np
from importlib import reload
import matplotlib.pyplot as plt
import yaml
import core.assignment_models as Models
import core
import os

# Configuration
PLOTS_DIR = "plots/evaluate_neutrino_impact_no_ground_truth/"
#MODEL_DIR = "../models/FeatureConcatTransformer_HLF_d256_l6_h8"
MODEL_DIR = f"models/mixed_model_HLF"
CONFIG_PATH = "../config/workspace_config_global_features.yaml"

# Create directories
os.makedirs(PLOTS_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)

# Set plotting style
plt.rcParams.update({"font.size": 14})

# Load data
load_config = get_load_config_from_yaml(CONFIG_PATH)
DataProcessor = DataPreprocessor(load_config)

with open(CONFIG_PATH, "r") as file:
    data_configs = yaml.safe_load(file)

data_config = DataProcessor.load_from_npz(
    data_configs["data_path"]["nominal"], 
    max_events=1_000_000
)
X_val, y_val = DataProcessor.get_data()
del DataProcessor

## Load Model and Setup Evaluator

In [11]:
reload(Models)
reload(core)
TransformerMatcher_nu_flows = Models.FeatureConcatTransformer(data_config, name=r"ToponiumInfusedTransformer", use_nu_flows=True)
TransformerMatcher = Models.FeatureConcatTransformer(data_config, name=r"ToponiumInfusedTransformer", use_nu_flows=False)

FeatureConcatTransformer is designed for classification tasks; regression targets will be ignored.
FeatureConcatTransformer is designed for classification tasks; regression targets will be ignored.


In [12]:
TransformerMatcher_nu_flows.load_model(f"{MODEL_DIR}/model.keras")
TransformerMatcher.load_model(f"{MODEL_DIR}/model.keras")

Model loaded from models/mixed_model_HLF/model.keras
Training history loaded from models/mixed_model_HLF/model_history.npz
Model loaded from models/mixed_model_HLF/model.keras
Training history loaded from models/mixed_model_HLF/model_history.npz


In [13]:
import core.reconstruction.BaselineMethods as BaselineMethods
import core.evaluation as Evaluation
from core.reconstruction import GroundTruthReconstructor

reload(Evaluation)
reload(BaselineMethods)

ground_truth_assigner_nu_flows = GroundTruthReconstructor(
    data_config, use_nu_flows=True
)

chi_square_true_nu = BaselineMethods.ChiSquareAssigner(
    data_config, use_nu_flows=False, use_nu_flows_for_assignment=True
)

chi_square = BaselineMethods.ChiSquareAssigner(
    data_config, use_nu_flows=True, use_nu_flows_for_assignment=True
)

ground_truth_assigner = GroundTruthReconstructor(
    data_config, use_nu_flows=False
)


evaluator = Evaluation.ReconstructionEvaluator(
    [
        chi_square,
        TransformerMatcher_nu_flows,
        chi_square_true_nu,
        TransformerMatcher,
        #ground_truth_assigner,
    ],
    X_val,
    y_val,
)

2025-12-03 00:05:51.419533: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:39] Ignoring Assert operator FeatureConcatTransformerModel_1/met_input_transform_1/assert_equal_1/Assert/Assert
2025-12-03 00:06:17.146884: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:39] Ignoring Assert operator FeatureConcatTransformerModel_1/met_input_transform_1/assert_equal_1/Assert/Assert
2025-12-03 00:06:39.717414: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:39] Ignoring Assert operator FeatureConcatTransformerModel_1/met_input_transform_1/assert_equal_1/Assert/Assert
2025-12-03 00:07:05.435237: W tensorflow/compiler/tf2xla/kernels/assert_op.cc:39] Ignoring Assert operator FeatureConcatTransformerModel_1/met_input_transform_1/assert_equal_1/Assert/Assert


## Distribution Plots

In [14]:
fig, ax = evaluator.plot_ttbar_mass_deviation_distribution(bins = 20, xlims = (-0.5, 3))
ax.set_yscale("log")
fig.savefig(f"{PLOTS_DIR}/ttbar_mass_deviation_distribution.pdf", bbox_inches="tight")

<Figure size 1000x1000 with 1 Axes>

## Resolution Plots

In [15]:
fig, ax = evaluator.plot_binned_c_hel_resolution(
    feature_data_type="non_training",
    feature_name="truth_ttbar_mass",
    fancy_feature_label=r"$m(t\overline{t})$ [GeV]",
    n_bootstrap=10,
    xlims=(340e3, 800e3),
)
ticks = ax.get_xticks()
ax.set_xticks(ticks)
ax.set_xticklabels([f"{int(tick/1e3)}" for tick in ticks])
ax.set_xlim(340e3, 800e3)
ax.legend(loc="upper right")
fig.savefig(PLOTS_DIR + "c_hel_resolution_comparison.pdf")


Computing binned $\cos(\theta_{hel})$ Resolution for truth_ttbar_mass...


<Figure size 1000x600 with 2 Axes>

In [16]:
fig, ax = evaluator.plot_binned_c_han_resolution(
    feature_data_type="non_training",
    feature_name="truth_ttbar_mass",
    fancy_feature_label=r"$m(t\overline{t})$ [GeV]",
    n_bootstrap=10,
    xlims=(340e3, 800e3),
)
ticks = ax.get_xticks()
ax.set_xticks(ticks)
ax.set_xticklabels([f"{int(tick/1e3)}" for tick in ticks])
ax.set_xlim(340e3, 800e3)
ax.legend(loc="upper right")
fig.savefig(PLOTS_DIR + "c_han_resolution_comparison.pdf")


Computing binned $\cos(\theta_{han})$ Resolution for truth_ttbar_mass...


<Figure size 1000x600 with 2 Axes>

In [17]:
fig, ax = evaluator.plot_binned_ttbar_mass_resolution(
    feature_data_type="non_training",
    feature_name="truth_ttbar_mass",
    fancy_feature_label=r"$m(t\overline{t})$ [GeV]",
    n_bootstrap=10,
    xlims=(340e3, 800e3),
)
ticks = ax.get_xticks()
ax.set_xticks(ticks)
ax.set_xticklabels([f"{int(tick/1e3)}" for tick in ticks])
ax.set_xlim(340e3, 800e3)
fig.savefig(PLOTS_DIR + "ttbar_mass_resolution_comparison.pdf")


Computing binned Relative $m(t\overline{t})$ Resolution for truth_ttbar_mass...


<Figure size 1000x600 with 2 Axes>

In [None]:
fig, ax = evaluator.plot_binned_top_mass_resolution(
    feature_data_type="non_training",
    feature_name="truth_ttbar_mass",
    fancy_feature_label=r"$m(t\overline{t})$ [GeV]",
    xlims=(340e3, 800e3),
    n_bootstrap=10,
)
ticks = ax.get_xticks()
ax.set_xticks(ticks)
ax.set_xticklabels([f"{int(tick/1e3)}" for tick in ticks])
ax.set_xlim(340e3, 800e3)
fig.savefig(PLOTS_DIR + "top_mass_resolution_comparison.pdf")


Computing binned Relative $m(t)$ Resolution for truth_ttbar_mass...
