In [1]:
import sys

sys.path.append("..")
from core.DataLoader import (
    DataPreprocessor,
    get_load_config_from_yaml,
)
from importlib import reload
import core
import keras
import core.keras_models.RegressionTransformer as RegressionTransformer

PLOTS_DIR = f"plots/regression_transformer/"
MODEL_DIR = f"models/regression_transformer/"
CONFIG_PATH = "../config/workspace_config.yaml"

import os

if not os.path.exists(PLOTS_DIR):
    os.makedirs(PLOTS_DIR)
if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)


load_config = get_load_config_from_yaml(CONFIG_PATH)

DataProcessor = DataPreprocessor(load_config)


data_config = DataProcessor.load_from_npz(
    load_config.data_path["nominal"], max_events=4_000_000, event_numbers="even"
)

X_train, y_train = DataProcessor.get_data()
del DataProcessor  # Free memory

2026-02-02 11:05:33.458047: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770026733.479852  271833 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770026733.487261  271833 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770026733.506252  271833 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770026733.506276  271833 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770026733.506278  271833 computation_placer.cc:177] computation placer alr

In [2]:
reload(RegressionTransformer)
Transformer = RegressionTransformer.FullRecoTransformer(data_config)

In [3]:
Transformer.build_model(
    hidden_dim=64,
    num_layers=6,
    dropout_rate=0.1,
)


I0000 00:00:1770026865.726469  271833 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 12461 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:65:00.0, compute capability: 6.0


In [4]:
Transformer.adapt_normalization_layers(X_train)
Transformer.compile_model(
    loss={
        "assignment": core.utils.AssignmentLoss(),
        "normalized_regression": core.utils.MagnitudeDirectionLoss(),
    },
    optimizer=keras.optimizers.AdamW(learning_rate=1e-4, weight_decay=1e-4),
    metrics={
        "assignment": [core.utils.AssignmentAccuracy(name="assignment_accuracy")],
        "normalized_regression": [core.utils.RegressionDeviation()],
    },
    #add_physics_informed_loss=True,
    loss_weights={"assignment": 1.0, "normalized_regression": 5.0}
)
Transformer.model.summary()

I0000 00:00:1770026868.928920  274009 service.cc:152] XLA service 0x7fb3e0002700 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1770026868.928947  274009 service.cc:160]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
2026-02-02 11:07:48.941668: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1770026868.957129  274009 cuda_dnn.cc:529] Loaded cuDNN version 91500
I0000 00:00:1770026869.094890  274009 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Adapted normalization layer:  lep_input_normalization
Adapted normalization layer:  jet_input_normalization
Adapted normalization layer:  met_input_normalization
Set regression denormalization layer with computed mean and std.


In [5]:
Transformer.train_model(
    epochs=10,
    X_train=X_train,
    y_train=y_train,
    sample_weights=core.utils.compute_sample_weights(X_train, y_train),
    batch_size=1024,
    callbacks=[
        keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss",
            factor=0.5,
            patience=5,
            verbose=1,
            mode="min",
            min_lr=1e-6,
        ),
    ],
    validation_split=0.1,
)

Epoch 1/10
[1m3512/3512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 41ms/step - assignment_assignment_accuracy: 0.1621 - assignment_loss: 0.1634 - loss: 16.1786 - normalized_regression_deviation: 0.6684 - normalized_regression_loss: 3.2030 - val_assignment_assignment_accuracy: 0.3918 - val_assignment_loss: 0.1254 - val_loss: 11.8634 - val_normalized_regression_deviation: 0.5242 - val_normalized_regression_loss: 2.3473 - learning_rate: 1.0000e-04
Epoch 2/10
[1m3512/3512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 32ms/step - assignment_assignment_accuracy: 0.3143 - assignment_loss: 0.1375 - loss: 12.8868 - normalized_regression_deviation: 0.6160 - normalized_regression_loss: 2.5499 - val_assignment_assignment_accuracy: 0.4764 - val_assignment_loss: 0.1112 - val_loss: 11.1965 - val_normalized_regression_deviation: 0.5278 - val_normalized_regression_loss: 2.2168 - learning_rate: 1.0000e-04
Epoch 3/10
[1m3512/3512[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11

<keras.src.callbacks.history.History at 0x7fb4e47b8d30>

In [6]:
Transformer.save_model(MODEL_DIR + "odd_model.keras")

Model saved to models/regression_transformer/odd_model.keras


In [7]:
upscale_layer = Transformer.model.get_layer("regression")

In [8]:
upscale_layer(y_train["normalized_regression"])

<tf.Tensor: shape=(3994940, 2, 3), dtype=float32, numpy=
array([[[ -11224.536  ,    1938.9197 ,  157064.17   ],
        [  16521.02   ,    8698.058  ,  107414.55   ]],

       [[  54834.15   ,  -50104.402  , -146743.88   ],
        [ -55453.574  ,   51458.14   , -190315.77   ]],

       [[ -68376.96   ,   40673.684  ,    8499.06   ],
        [  11564.977  ,   31079.336  ,   59003.395  ]],

       ...,

       [[ -24958.994  ,   40550.17   ,  -46625.633  ],
        [ -47380.742  ,    4647.9795 , -175055.86   ]],

       [[  64607.34   ,   25096.54   ,  -79213.53   ],
        [    832.96747,   10537.842  ,   67501.21   ]],

       [[ -32088.006  ,  104554.64   , -752071.9    ],
        [  80692.63   ,  -19914.682  , -101689.86   ]]], dtype=float32)>

In [9]:
import numpy as np
import tensorflow as tf
np.abs(upscale_layer(y_train["normalized_regression"]) - X_train["neutrino_truth"]).mean()

np.float32(0.00036604964)

In [10]:
from core.components import reco_W_mass_deviation
leptons = X_train["lep_inputs"]
neutrino_pred = Transformer.reconstruct_neutrinos(X_train)
neutrino_true = X_train["neutrino_truth"]

In [11]:
reco_W_mass_deviation(tf.convert_to_tensor(neutrino_pred, dtype=tf.float32), tf.convert_to_tensor(leptons, dtype=tf.float32)).numpy().mean()

np.float32(0.17343946)

In [12]:
reco_W_mass_deviation(tf.convert_to_tensor(neutrino_true, dtype=tf.float32), tf.convert_to_tensor(leptons, dtype=tf.float32)).numpy().mean()

np.float32(0.0065785283)