In [1]:
import sys

sys.path.append("..")
from core.DataLoader import (
    DataPreprocessor,
    get_load_config_from_yaml,
)
from importlib import reload
import core
import keras
import core.keras_models.RegressionTransformer as RegressionTransformer

PLOTS_DIR = f"plots/regression_transformer/"
MODEL_DIR = f"models/regression_transformer/"
CONFIG_PATH = "../config/workspace_config.yaml"

import os

if not os.path.exists(PLOTS_DIR):
    os.makedirs(PLOTS_DIR)
if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)


load_config = get_load_config_from_yaml(CONFIG_PATH)

DataProcessor = DataPreprocessor(load_config)


data_config = DataProcessor.load_from_npz(
    load_config.data_path["nominal"], max_events=4_000_000, event_numbers="even"
)

X_train, y_train = DataProcessor.get_data()
del DataProcessor  # Free memory

2026-02-03 14:00:09.571703: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770123609.593870 2013908 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770123609.601206 2013908 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770123609.618954 2013908 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770123609.618973 2013908 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770123609.618975 2013908 computation_placer.cc:177] computation placer alr

In [2]:
reload(RegressionTransformer)
Transformer = RegressionTransformer.FeatureConcatTransformerReconstructor(data_config, name="Transformer")

In [3]:
Transformer.build_model(
    hidden_dim=64,
    num_layers=6,
    dropout_rate=0.1,
)


I0000 00:00:1770123727.621292 2013908 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15511 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:65:00.0, compute capability: 6.0


In [4]:
Transformer.adapt_normalization_layers(X_train)
Transformer.compile_model(
    loss={
        "assignment": core.utils.AssignmentLoss(),
        "normalized_regression": core.utils.MagnitudeDirectionLoss(),
    },
    optimizer=keras.optimizers.AdamW(learning_rate=1e-4, weight_decay=1e-4, clipnorm=1.0),
    metrics={
        "assignment": [core.utils.AssignmentAccuracy(name="assignment_accuracy")],
        "normalized_regression": [core.utils.RegressionDeviation()],
    },
    #add_physics_informed_loss=True,
    loss_weights={"assignment": 1.0, "normalized_regression": 1.0}
)

I0000 00:00:1770123730.927787 2016440 service.cc:152] XLA service 0x7fb57c0023c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1770123730.927814 2016440 service.cc:160]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
2026-02-03 14:02:10.950039: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1770123731.002177 2016440 cuda_dnn.cc:529] Loaded cuDNN version 91500
I0000 00:00:1770123731.285031 2016440 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Adapted normalization layer:  hlf_input_normalization
Adapted normalization layer:  met_input_normalization
Adapted normalization layer:  lep_input_normalization
Adapted normalization layer:  jet_input_normalization
Set regression denormalization layer with computed mean and std.


In [None]:
Transformer.train_model(
    epochs=10,
    X_train=X_train,
    sample_weights=core.utils.compute_sample_weights(X_train, y_train),
    batch_size=1024,
    callbacks=[
        keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss",
            factor=0.5,
            patience=5,
            verbose=1,
            mode="min",
            min_lr=1e-6,
        ),
    ],
    validation_split=0.1,
)

Epoch 1/10


In [None]:
Transformer.save_model(MODEL_DIR + "odd_model.keras")

In [None]:
upscale_layer = Transformer.model.get_layer("regression")

In [None]:
upscale_layer(y_train["normalized_regression"])

In [None]:
import numpy as np
import tensorflow as tf
np.abs(upscale_layer(y_train["normalized_regression"]) - X_train["regression"]).mean()

In [None]:
from core.components import reco_W_mass_deviation
leptons = X_train["lep_inputs"]
neutrino_pred = Transformer.reconstruct_neutrinos(X_train)
neutrino_true = X_train["regression"]
neutrino_nu_flows = X_train["nu_flows_neutrino_truth"]

In [None]:
reco_W_mass_deviation(tf.convert_to_tensor(neutrino_pred, dtype=tf.float32), tf.convert_to_tensor(leptons, dtype=tf.float32)).numpy().mean()

In [None]:
reco_W_mass_deviation(tf.convert_to_tensor(neutrino_true, dtype=tf.float32), tf.convert_to_tensor(leptons, dtype=tf.float32)).numpy().mean()

In [None]:
reco_W_mass_deviation(tf.convert_to_tensor(neutrino_nu_flows, dtype=tf.float32), tf.convert_to_tensor(leptons, dtype=tf.float32)).numpy().mean()