In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os, json, joblib, numpy as np, pandas as pd
from pathlib import Path
import warnings 
warnings.filterwarnings("ignore")


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

from tensorflow.keras.utils import Sequence, to_categorical, pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Input, Conv1D, BatchNormalization, Activation, add, MaxPooling1D, Dropout,
    Bidirectional, LSTM, GlobalAveragePooling1D, Dense, Multiply, Reshape,
    Lambda, Concatenate, GRU, GaussianNoise
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
import tensorflow as tf
import polars as pl
from sklearn.model_selection import StratifiedGroupKFold
from scipy.spatial.transform import Rotation as R

In [None]:
import random
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.experimental.numpy.random.seed(seed)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
seed_everything(seed=42)

In [None]:
# (Competition metric will only be imported when TRAINing)
TRAIN = True                  # ← set to True when you want to train
RAW_DIR = Path("/kaggle/input/cmi-detect-behavior-with-sensor-data")
PRETRAINED_DIR = Path("/kaggle/input/quit-diff2")  # used when TRAIN=False
EXPORT_DIR = Path("./")                                    # artefacts will be saved here
BATCH_SIZE = 64
PAD_PERCENTILE = 95
LR_INIT = 5e-4
WD = 3e-3
MIXUP_ALPHA = 0.4
EPOCHS = 160
PATIENCE = 40


print("▶ imports ready · tensorflow", tf.__version__)

In [None]:
# Normalizes and cleans the time series sequence. 

def preprocess_sequence(df_seq: pd.DataFrame, feature_cols: list[str], scaler: StandardScaler):
    mat = df_seq[feature_cols].ffill().bfill().fillna(0).values
    return scaler.transform(mat).astype('float32')

# MixUp the data argumentation in order to regularize the neural network. 

class MixupGenerator(Sequence):
    def __init__(self, X, y, batch_size, alpha=0.2):
        self.X, self.y = X, y
        self.batch = batch_size
        self.alpha = alpha
        self.indices = np.arange(len(X))
    def __len__(self):
        return int(np.ceil(len(self.X) / self.batch))
    def __getitem__(self, i):
        idx = self.indices[i*self.batch:(i+1)*self.batch]
        Xb, yb = self.X[idx], self.y[idx]
        lam = np.random.beta(self.alpha, self.alpha)
        perm = np.random.permutation(len(Xb))
        X_mix = lam * Xb + (1-lam) * Xb[perm]
        y_mix = lam * yb + (1-lam) * yb[perm]
        return X_mix, y_mix
    def on_epoch_end(self):
        np.random.shuffle(self.indices)

In [None]:
def remove_gravity_from_acc(acc_data, rot_data):

    if isinstance(acc_data, pd.DataFrame):
        acc_values = acc_data[['acc_x', 'acc_y', 'acc_z']].values
    else:
        acc_values = acc_data

    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = acc_values.shape[0]
    linear_accel = np.zeros_like(acc_values)
    
    gravity_world = np.array([0, 0, 9.81])

    for i in range(num_samples):
        if np.all(np.isnan(quat_values[i])) or np.all(np.isclose(quat_values[i], 0)):
            linear_accel[i, :] = acc_values[i, :] 
            continue

        try:
            rotation = R.from_quat(quat_values[i])
            gravity_sensor_frame = rotation.apply(gravity_world, inverse=True)
            linear_accel[i, :] = acc_values[i, :] - gravity_sensor_frame
        except ValueError:
             linear_accel[i, :] = acc_values[i, :]
             
    return linear_accel

def calculate_angular_velocity_from_quat(rot_data, time_delta=1/200): # Assuming 200Hz sampling rate
    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = quat_values.shape[0]
    angular_vel = np.zeros((num_samples, 3))

    for i in range(num_samples - 1):
        q_t = quat_values[i]
        q_t_plus_dt = quat_values[i+1]

        if np.all(np.isnan(q_t)) or np.all(np.isclose(q_t, 0)) or \
           np.all(np.isnan(q_t_plus_dt)) or np.all(np.isclose(q_t_plus_dt, 0)):
            continue

        try:
            rot_t = R.from_quat(q_t)
            rot_t_plus_dt = R.from_quat(q_t_plus_dt)

            # Calculate the relative rotation
            delta_rot = rot_t.inv() * rot_t_plus_dt
            
            # Convert delta rotation to angular velocity vector
            # The rotation vector (Euler axis * angle) scaled by 1/dt
            # is a good approximation for small delta_rot
            angular_vel[i, :] = delta_rot.as_rotvec() / time_delta
        except ValueError:
            # If quaternion is invalid, angular velocity remains zero
            pass
            
    return angular_vel

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model

class PositionalEncoding(Layer):
    def __init__(self, seq_len, d_model):
        super().__init__()
        self.seq_len = seq_len
        self.d_model = d_model
        self.pos_encoding = self._positional_encoding(seq_len, d_model)

    def _get_angles(self, position, i, d_model):
        angle_rates = 1 / tf.pow(10000.0, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        return position * angle_rates

    def _positional_encoding(self, seq_len, d_model):
        position = tf.cast(tf.range(seq_len)[:, tf.newaxis], dtype=tf.float32)
        i = tf.cast(tf.range(d_model)[tf.newaxis, :], dtype=tf.float32)
        angle_rads = self._get_angles(position, i, d_model)

        # Apply sin to even indices and cos to odd indices
        sines = tf.math.sin(angle_rads[:, 0::2])
        cosines = tf.math.cos(angle_rads[:, 1::2])

        # Interleave sines and cosines
        pos_encoding = tf.concat([sines, cosines], axis=-1)
        pos_encoding = tf.reshape(pos_encoding, [1, seq_len, d_model])
        return tf.cast(pos_encoding, tf.float32)

    def call(self, x):
        return x + self.pos_encoding[:, :tf.shape(x)[1], :]
        
class LearnedPositionalEmbedding(Layer):
    def __init__(self, seq_len, d_model):
        super().__init__()
        self.pos_emb = self.add_weight(
            name="pos_emb",  # ✅ explicitly name this
            shape=[seq_len, d_model],
            initializer="random_normal"
        )

    def call(self, x):
        return x + self.pos_emb[tf.newaxis, :, :]

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0.3):
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = Dropout(dropout)(x)
    x = Add()([x, inputs])

    x_skip = x
    x = LayerNormalization(epsilon=1e-6)(x)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    x = Add()([x, x_skip])
    return x

def build_transformer_model(input_shape, n_classes, num_layers=4, head_size=64, num_heads=2, ff_dim=128, dropout=0.1):
    inputs = Input(shape=input_shape)

    x = Dense(128)(inputs)  # Linear projection to embed dimension
    x = GaussianNoise(0.09)(x)
    #x = PositionalEncoding(seq_len=input_shape[0], d_model=128)(x)
    x = LearnedPositionalEmbedding(input_shape[0], 128)(x)

    for _ in range(num_layers):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = GlobalAveragePooling1D()(x)
    x = Dropout(0.3)(x)
    x = Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)
    x = Dropout(0.3)(x)
    outputs = Dense(n_classes, activation='softmax')(x)

    return Model(inputs, outputs)


In [None]:
def build_tof_model(pad_len, tof_dim, n_classes, wd=1e-4):
    inp = Input(shape=(pad_len, tof_dim))

    x = Conv1D(64, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(inp)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling1D(2)(x)
    x = Dropout(0.2)(x)

    x = Conv1D(128, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling1D(2)(x)
    x = Dropout(0.2)(x)

    xa = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
    xb = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(wd)))(x)
    xc = GaussianNoise(0.09)(x)
    xc = Dense(16, activation='elu')(xc)

    x = Concatenate()([xa, xb, xc])
    x = Dropout(0.4)(x)
    x = attention_layer(x)

    for units, drop in [(256, 0.5), (128, 0.3)]:
        x = Dense(units, use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(drop)(x)

    out = Dense(n_classes, activation='softmax', kernel_regularizer=l2(wd))(x)
    return Model(inp, out)

In [None]:
print("▶ TRAIN MODE – loading dataset …")
df = pd.read_csv(RAW_DIR / "train.csv")

train_dem_df = pd.read_csv(RAW_DIR / "train_demographics.csv")

# Keep only necessary demographic columns (used later for centripetal calculations)
needed_dem_cols = ['subject', 'shoulder_to_wrist_cm', 'elbow_to_wrist_cm']
train_dem_df = train_dem_df[needed_dem_cols]

# Merge only required demographic info into a minimal df
df_for_groups = pd.merge(df[['sequence_id', 'subject']].drop_duplicates(), train_dem_df, on='subject', how='left')

# Create lookup maps for memory-efficient access later
shoulder_map = df_for_groups.set_index('sequence_id')['shoulder_to_wrist_cm'].to_dict()
elbow_map = df_for_groups.set_index('sequence_id')['elbow_to_wrist_cm'].to_dict()

# Clean up memory
del train_dem_df
del df_for_groups

# Label encoding
le = LabelEncoder()
df['gesture_int'] = le.fit_transform(df['gesture'])
np.save(EXPORT_DIR / "gesture_classes.npy", le.classes_)
gesture_classes = le.classes_


print("  Calculating base engineered IMU features (magnitude, angle)...")
df['acc_mag'] = np.sqrt(df['acc_x']**2 + df['acc_y']**2 + df['acc_z']**2)
df['rot_angle'] = 2 * np.arccos(df['rot_w'].clip(-1, 1))

print("  Calculating engineered IMU derivatives (jerk, angular velocity) for original acc_mag...")
df['acc_mag_jerk'] = df.groupby('sequence_id')['acc_mag'].diff().fillna(0)
df['rot_angle_vel'] = df.groupby('sequence_id')['rot_angle'].diff().fillna(0)

print("  Removing gravity and calculating linear acceleration features...")

linear_accel_list = []
for _, group in df.groupby('sequence_id'):
    acc_data_group = group[['acc_x', 'acc_y', 'acc_z']]
    rot_data_group = group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
    linear_accel_group = remove_gravity_from_acc(acc_data_group, rot_data_group)
    linear_accel_list.append(pd.DataFrame(linear_accel_group, columns=['linear_acc_x', 'linear_acc_y', 'linear_acc_z'], index=group.index))

df_linear_accel = pd.concat(linear_accel_list)
df = pd.concat([df, df_linear_accel], axis=1)

df['linear_acc_mag'] = np.sqrt(df['linear_acc_x']**2 + df['linear_acc_y']**2 + df['linear_acc_z']**2)
df['linear_acc_mag_jerk'] = df.groupby('sequence_id')['linear_acc_mag'].diff().fillna(0)


print("  Calculating angular velocity from quaternion derivatives...")
angular_vel_list = []

for seq_id, group in df.groupby('sequence_id'):
    rot_data_group = group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
    angular_vel_group = calculate_angular_velocity_from_quat(rot_data_group)
    angular_vel_list.append(pd.DataFrame(angular_vel_group, columns=['angular_vel_x', 'angular_vel_y', 'angular_vel_z'], index=group.index))



df_angular_vel = pd.concat(angular_vel_list, axis=0)

df = pd.concat([df, df_angular_vel], axis=1)


print("  Calculating angular jerk from angular velocity...")
df['angular_jerk_x'] = df.groupby('sequence_id')['angular_vel_x'].diff().fillna(0)
df['angular_jerk_y'] = df.groupby('sequence_id')['angular_vel_y'].diff().fillna(0)
df['angular_jerk_z'] = df.groupby('sequence_id')['angular_vel_z'].diff().fillna(0)

print("  Calculating angular snap from angular jerk...")
df['angular_snap_x'] = df.groupby('sequence_id')['angular_jerk_x'].diff().fillna(0)
df['angular_snap_y'] = df.groupby('sequence_id')['angular_jerk_y'].diff().fillna(0)
df['angular_snap_z'] = df.groupby('sequence_id')['angular_jerk_z'].diff().fillna(0)

meta_cols = { } # This was an empty dict in your provided code, keeping it as is.

imu_cols_base = ['linear_acc_x', 'linear_acc_y', 'linear_acc_z']
imu_cols_base.extend([c for c in df.columns if c.startswith('rot_') and c not in ['rot_angle', 'rot_angle_vel']])

imu_engineered_features = [
    'acc_mag', 'rot_angle',
    'acc_mag_jerk', 'rot_angle_vel',
    'linear_acc_mag', 'linear_acc_mag_jerk',
    'angular_vel_x', 'angular_vel_y', 'angular_vel_z',
    'angular_jerk_x', 'angular_jerk_y', 'angular_jerk_z',
    'angular_snap_x', 'angular_snap_y', 'angular_snap_z'# Added new angular snap features
]

imu_cols = imu_cols_base + imu_engineered_features
imu_cols = list(dict.fromkeys(imu_cols))

thm_cols_original = [c for c in df.columns if c.startswith('thm_')]
    
tof_aggregated_cols_template = []
for i in range(1, 6):
    tof_aggregated_cols_template.extend([f'tof_{i}_mean', f'tof_{i}_std', f'tof_{i}_min', f'tof_{i}_max'])

final_feature_cols = imu_cols + thm_cols_original + tof_aggregated_cols_template
imu_dim_final = len(imu_cols)
tof_thm_aggregated_dim_final = len(thm_cols_original) + len(tof_aggregated_cols_template)

print(f"  IMU (incl. engineered & derivatives) {imu_dim_final} | THM + Aggregated TOF {tof_thm_aggregated_dim_final} | total {len(final_feature_cols)} features")
np.save(EXPORT_DIR / "feature_cols.npy", np.array(final_feature_cols))

print("  Building sequences with aggregated TOF and preparing data for scaler...")
seq_gp = df.groupby('sequence_id') 

all_steps_for_scaler_list = []
X_list_unscaled, y_list_int_for_stratify, lens = [], [], [] 

for seq_id, seq_df_orig in seq_gp:
    seq_df = seq_df_orig.copy()

    for i in range(1, 6):
        pixel_cols_tof = [f"tof_{i}_v{p}" for p in range(64)]
        tof_sensor_data = seq_df[pixel_cols_tof].replace(-1, np.nan)
        seq_df[f'tof_{i}_mean'] = tof_sensor_data.mean(axis=1)
        seq_df[f'tof_{i}_std']  = tof_sensor_data.std(axis=1)
        seq_df[f'tof_{i}_min']  = tof_sensor_data.min(axis=1)
        seq_df[f'tof_{i}_max']  = tof_sensor_data.max(axis=1)
    
    mat_unscaled = seq_df[final_feature_cols].ffill().bfill().fillna(0).values.astype('float32')
    
    all_steps_for_scaler_list.append(mat_unscaled)
    X_list_unscaled.append(mat_unscaled)
    y_list_int_for_stratify.append(seq_df['gesture_int'].iloc[0])
    lens.append(len(mat_unscaled))

print("  Fitting StandardScaler...")
all_steps_concatenated = np.concatenate(all_steps_for_scaler_list, axis=0)
scaler = StandardScaler().fit(all_steps_concatenated)
joblib.dump(scaler, EXPORT_DIR / "scaler.pkl")
del all_steps_for_scaler_list, all_steps_concatenated

print("  Scaling and padding sequences...")
X_scaled_list = [scaler.transform(x_seq) for x_seq in X_list_unscaled]
del X_list_unscaled

pad_len = int(np.percentile(lens, PAD_PERCENTILE))

X = pad_sequences(X_scaled_list, maxlen=pad_len, padding='post', truncating='post', dtype='float32')
del X_scaled_list

y_int_for_stratify = np.array(y_list_int_for_stratify)
y = to_categorical(y_int_for_stratify, num_classes=len(le.classes_))

In [None]:
print("  Splitting data and preparing for training...")
X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=0.2, random_state=82, stratify=y_int_for_stratify)

cw_vals = compute_class_weight('balanced', classes=np.arange(len(le.classes_)), y=y_int_for_stratify)
class_weight = dict(enumerate(cw_vals))

#model = build_two_branch_model(pad_len, imu_dim_final, tof_thm_aggregated_dim_final, len(le.classes_), wd=WD)
model = build_transformer_model(input_shape=(pad_len, 47), n_classes=18 )

steps = len(X_tr) // BATCH_SIZE
lr_sched = tf.keras.optimizers.schedules.CosineDecayRestarts(5e-4, first_decay_steps=15 * steps) 

model.compile(optimizer=Adam(lr_sched),
              loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
              metrics=['accuracy'])

train_gen = MixupGenerator(X_tr, y_tr, batch_size=BATCH_SIZE, alpha=MIXUP_ALPHA)
cb = EarlyStopping(patience=PATIENCE, restore_best_weights=True, verbose=1, monitor='val_accuracy', mode='max')

print("  Starting model training...")
model.fit(train_gen, epochs=EPOCHS, validation_data=(X_val, y_val),
          class_weight=class_weight, callbacks=[cb], verbose=1)

model.save(EXPORT_DIR / "gesture_two_branch_mixup.h5")
print("✔ Training done – artefacts saved in", EXPORT_DIR)

from cmi_2025_metric_copy_for_import import CompetitionMetric
preds_val = model.predict(X_val).argmax(1)
true_val_int  = y_val.argmax(1)

h_f1 = CompetitionMetric().calculate_hierarchical_f1(
    pd.DataFrame({'gesture': le.classes_[true_val_int]}),
    pd.DataFrame({'gesture': le.classes_[preds_val]}))
print("Hold‑out H‑F1 =", round(h_f1, 4))

In [None]:
# === IMU MODEL TRAINING ===
print("▶ TRAINING IMU MODEL")

imu_feature_cols = imu_cols  # Already defined from your earlier feature engineering

# Prepare IMU-only sequences
X_list_imu_unscaled = []
for seq_id, seq_df in df.groupby('sequence_id'):
    imu_data = seq_df[imu_feature_cols].ffill().bfill().fillna(0).values.astype('float32')
    X_list_imu_unscaled.append(imu_data)

print("  Scaling IMU data...")
all_imu = np.concatenate(X_list_imu_unscaled, axis=0)
scaler_imu = StandardScaler().fit(all_imu)
joblib.dump(scaler_imu, EXPORT_DIR / "scaler_imu.pkl")

X_imu_scaled = [scaler_imu.transform(x) for x in X_list_imu_unscaled]
X_imu = pad_sequences(X_imu_scaled, maxlen=pad_len, padding='post', truncating='post', dtype='float32')

X_tr_imu, X_val_imu, y_tr, y_val = train_test_split(X_imu, y, test_size=0.2, random_state=82, stratify=y_int_for_stratify)

cw_vals = compute_class_weight('balanced', classes=np.arange(len(le.classes_)), y=y_int_for_stratify)
class_weight = dict(enumerate(cw_vals))

print("  Building and training IMU model...")
#imu_model = build_imu_model(pad_len, imu_dim_final, len(le.classes_), wd=WD)
#imu_model = build_transformer_model(input_shape=(pad_len, imu_dim_final), n_classes=len(le.classes_))
imu_model = build_transformer_model(input_shape=(127, 22), n_classes=18 )

steps = len(X_tr_imu) // BATCH_SIZE
lr_sched = tf.keras.optimizers.schedules.CosineDecayRestarts(5e-4, first_decay_steps=15 * steps) 

imu_model.compile(
    optimizer=Adam(lr_sched),
    loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
    metrics=['accuracy']
)

train_gen_imu = MixupGenerator(X_tr_imu, y_tr, batch_size=BATCH_SIZE, alpha=MIXUP_ALPHA)
cb = EarlyStopping(patience=PATIENCE, restore_best_weights=True, verbose=1, monitor='val_accuracy', mode='max')
imu_model.fit(train_gen_imu, epochs=EPOCHS, validation_data=(X_val_imu, y_val),
              class_weight=class_weight, callbacks=[cb], verbose=1)

imu_model.save(EXPORT_DIR / "imu_model.h5")

from cmi_2025_metric_copy_for_import import CompetitionMetric
preds_val = imu_model.predict(X_val_imu).argmax(1)
true_val_int  = y_val.argmax(1)

h_f1 = CompetitionMetric().calculate_hierarchical_f1(
    pd.DataFrame({'gesture': le.classes_[true_val_int]}),
    pd.DataFrame({'gesture': le.classes_[preds_val]}))
print("Hold‑out H‑F1 =", round(h_f1, 4))

In [None]:
X_tr_imu.shape

In [None]:
pad_len

In [None]:
# === TOF MODEL TRAINING ===
print("▶ TRAINING TOF+THM MODEL")

tof_thm_feature_cols = thm_cols_original + tof_aggregated_cols_template

for seq_id, seq_df in df.groupby('sequence_id'):
    for i in range(1, 6):
        pixel_cols_tof = [f"tof_{i}_v{p}" for p in range(64)]
        tof_sensor_data = seq_df[pixel_cols_tof].replace(-1, np.nan)
        df.loc[seq_df.index, f'tof_{i}_mean'] = tof_sensor_data.mean(axis=1)
        df.loc[seq_df.index, f'tof_{i}_std']  = tof_sensor_data.std(axis=1)
        df.loc[seq_df.index, f'tof_{i}_min']  = tof_sensor_data.min(axis=1)
        df.loc[seq_df.index, f'tof_{i}_max']  = tof_sensor_data.max(axis=1)
        
X_list_tof_unscaled = []
for seq_id, seq_df in df.groupby('sequence_id'):
    tof_data = seq_df[tof_thm_feature_cols].ffill().bfill().fillna(0).values.astype('float32')
    X_list_tof_unscaled.append(tof_data)



print("  Scaling TOF+THM data...")
all_tof = np.concatenate(X_list_tof_unscaled, axis=0)
scaler_tof = StandardScaler().fit(all_tof)
joblib.dump(scaler_tof, EXPORT_DIR / "scaler_tof.pkl")

X_tof_scaled = [scaler_tof.transform(x) for x in X_list_tof_unscaled]
X_tof = pad_sequences(X_tof_scaled, maxlen=pad_len, padding='post', truncating='post', dtype='float32')

X_tr_tof, X_val_tof, y_tr, y_val = train_test_split(X_tof, y, test_size=0.2, random_state=82, stratify=y_int_for_stratify)

cw_vals = compute_class_weight('balanced', classes=np.arange(len(le.classes_)), y=y_int_for_stratify)
class_weight = dict(enumerate(cw_vals))

print("  Building and training TOF model...")
tof_model = build_tof_model(pad_len, tof_thm_aggregated_dim_final, len(le.classes_), wd=WD)
steps = len(X_tr_tof) // BATCH_SIZE
lr_sched = tf.keras.optimizers.schedules.CosineDecayRestarts(5e-4, first_decay_steps=15 * steps) 

tof_model.compile(
    optimizer=Adam(lr_sched),
    loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
    metrics=['accuracy']
)

train_gen_tof = MixupGenerator(X_tr_tof, y_tr, batch_size=BATCH_SIZE, alpha=MIXUP_ALPHA)
cb = EarlyStopping(patience=PATIENCE, restore_best_weights=True, verbose=1, monitor='val_accuracy', mode='max')
tof_model.fit(train_gen_tof, epochs=EPOCHS, validation_data=(X_val_tof, y_val),
              class_weight=class_weight, callbacks=[cb], verbose=1)

tof_model.save(EXPORT_DIR / "tof_model.h5")

from cmi_2025_metric_copy_for_import import CompetitionMetric
preds_val = tof_model.predict(X_val_tof).argmax(1)
true_val_int  = y_val.argmax(1)

h_f1 = CompetitionMetric().calculate_hierarchical_f1(
    pd.DataFrame({'gesture': le.classes_[true_val_int]}),
    pd.DataFrame({'gesture': le.classes_[preds_val]}))
print("Hold‑out H‑F1 =", round(h_f1, 4))