In [1]:
import os, random, math
import pandas as pd, numpy as np, polars as pl
from pathlib import Path
from scipy.spatial.transform import Rotation as R
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress ALL TensorFlow logs
import tensorflow as tf
tf.get_logger().setLevel('ERROR')  # Only show errors 
from tensorflow.keras.utils import to_categorical, pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Input, Conv1D, BatchNormalization, Activation, add, MaxPooling1D, 
    Dropout, Bidirectional, LSTM, GlobalAveragePooling1D, Dense, Multiply,
    Reshape, Lambda, Concatenate, GRU, GaussianNoise
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import joblib
import warnings
warnings.filterwarnings('ignore')

TRAIN = False
BASE_DIR = Path("/kaggle/input/cmi-detect-behavior-with-sensor-data")
PRETRAINED_DIR = Path("/kaggle/input/artifact0")
EXPORT_DIR = Path("/kaggle/working")
SEED = 7
BATCH_SIZE = 64
PAD_PERCENTILE = 95
LR_INIT = 5e-4
WD = 3e-3
MIXUP_ALPHA = 0.4
EPOCHS = 160
PATIENCE = 40
N_SPLITS = 10
MASKING_PROB = 0.35
GATE_LOSS_WEIGHT = 0.2

def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.experimental.numpy.random.seed(seed)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ['TF_DETERMINISTIC_OPS'] = '1'

seed_everything(SEED)

def remove_gravity_from_acc(acc_data, rot_data):
    acc_values = acc_data[['acc_x', 'acc_y', 'acc_z']].values
    quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    linear_accel = np.zeros_like(acc_values)
    gravity_world = np.array([0, 0, 9.81])
    
    for i in range(len(acc_values)):
        if np.all(np.isnan(quat_values[i])) or np.all(np.isclose(quat_values[i], 0)):
            linear_accel[i, :] = acc_values[i, :]
            continue
        try:
            rotation = R.from_quat(quat_values[i])
            gravity_sensor_frame = rotation.apply(gravity_world, inverse=True)
            linear_accel[i, :] = acc_values[i, :] - gravity_sensor_frame
        except ValueError:
            linear_accel[i, :] = acc_values[i, :]
    return linear_accel

def calculate_angular_velocity_from_quat(rot_data, time_delta=1/200):
    quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    angular_vel = np.zeros((len(quat_values), 3))
    
    for i in range(len(quat_values) - 1):
        q_t, q_t_plus_dt = quat_values[i], quat_values[i+1]
        if np.all(np.isnan(q_t)) or np.all(np.isnan(q_t_plus_dt)):
            continue
        try:
            rot_t = R.from_quat(q_t)
            rot_t_plus_dt = R.from_quat(q_t_plus_dt)
            delta_rot = rot_t.inv() * rot_t_plus_dt
            angular_vel[i, :] = delta_rot.as_rotvec() / time_delta
        except ValueError:
            pass
    return angular_vel

def calculate_angular_distance(rot_data):
    quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    angular_dist = np.zeros(len(quat_values))
    
    for i in range(len(quat_values) - 1):
        q1, q2 = quat_values[i], quat_values[i+1]
        if np.all(np.isnan(q1)) or np.all(np.isnan(q2)):
            continue
        try:
            r1, r2 = R.from_quat(q1), R.from_quat(q2)
            relative_rotation = r1.inv() * r2
            angular_dist[i] = np.linalg.norm(relative_rotation.as_rotvec())
        except ValueError:
            pass
    return angular_dist

def cmi_metric(y_true_gestures, y_pred_gestures, bfrb_gestures=None):
    y_true_gestures = np.array(y_true_gestures)
    y_pred_gestures = np.array(y_pred_gestures)
    
    y_true_binary = np.array(['target' if gesture in bfrb_gestures else 'non_target' 
                             for gesture in y_true_gestures])
    y_pred_binary = np.array(['target' if gesture in bfrb_gestures else 'non_target' 
                             for gesture in y_pred_gestures])
    
    binary_f1 = f1_score(y_true_binary, y_pred_binary, pos_label='target')
    
    y_true_collapsed = []
    y_pred_collapsed = []
    
    for true_gesture, pred_gesture in zip(y_true_gestures, y_pred_gestures):
        if true_gesture in bfrb_gestures:
            y_true_collapsed.append(true_gesture)
        else:
            y_true_collapsed.append('non_target')
            
        if pred_gesture in bfrb_gestures:
            y_pred_collapsed.append(pred_gesture)
        else:
            y_pred_collapsed.append('non_target')
    
    y_true_collapsed = np.array(y_true_collapsed)
    y_pred_collapsed = np.array(y_pred_collapsed)
    
    macro_f1 = f1_score(y_true_collapsed, y_pred_collapsed, average='macro')
    composite_score = (binary_f1 + macro_f1) / 2.0
    
    return {
        'binary_f1': binary_f1,
        'macro_f1': macro_f1, 
        'composite_score': composite_score
    }

def evaluate_with_cmi_metric(model, X_val, y_val_gestures, gesture_classes, bfrb_gestures):
    predictions = model.predict(X_val, verbose=0)[0]
    pred_gesture_indices = predictions.argmax(axis=1)
    pred_gestures = gesture_classes[pred_gesture_indices]
    scores = cmi_metric(y_val_gestures, pred_gestures, bfrb_gestures)
    return scores


def get_individual_gesture_scores(model, X_val, y_val_gestures, gesture_classes, bfrb_gestures):
    """Calculate F1 score for each individual gesture"""
    predictions = model.predict(X_val, verbose=0)[0]
    pred_gesture_indices = predictions.argmax(axis=1)
    pred_gestures = gesture_classes[pred_gesture_indices]
    
    gesture_scores = {}
    unique_gestures = np.unique(y_val_gestures)
    
    for gesture in unique_gestures:
        y_true_binary = (y_val_gestures == gesture).astype(int)
        y_pred_binary = (pred_gestures == gesture).astype(int)
        
        if y_true_binary.sum() > 0:
            f1 = f1_score(y_true_binary, y_pred_binary, zero_division=0)
            gesture_scores[gesture] = f1
    
    return gesture_scores

def evaluate_dual_cmi_metric(model, X_val, y_val_gestures, gesture_classes, bfrb_gestures, imu_dim):
    
    cmi_full = evaluate_with_cmi_metric(model, X_val, y_val_gestures, gesture_classes, bfrb_gestures)
    
    
    X_val_imu_only = X_val.copy()
    X_val_imu_only[:, :, imu_dim:] = 0.0
    
    cmi_imu = evaluate_with_cmi_metric(model, X_val_imu_only, y_val_gestures, gesture_classes, bfrb_gestures)
    
    realistic_composite = (cmi_full['composite_score'] + cmi_imu['composite_score']) / 2.0
    realistic_binary = (cmi_full['binary_f1'] + cmi_imu['binary_f1']) / 2.0
    realistic_macro = (cmi_full['macro_f1'] + cmi_imu['macro_f1']) / 2.0
    
    sensor_dependency = cmi_full['composite_score'] - cmi_imu['composite_score']
    
    return {
        'composite_score': realistic_composite,
        'binary_f1': realistic_binary, 
        'macro_f1': realistic_macro,
        'full_sensor_composite': cmi_full['composite_score'],
        'full_sensor_binary': cmi_full['binary_f1'],
        'full_sensor_macro': cmi_full['macro_f1'],
        'imu_only_composite': cmi_imu['composite_score'],
        'imu_only_binary': cmi_imu['binary_f1'],
        'imu_only_macro': cmi_imu['macro_f1'],
        'sensor_dependency': sensor_dependency,
        'performance_stability': 1.0 - (sensor_dependency / max(cmi_full['composite_score'], 0.01))
    }

class IMUSpecificScaler:
    def __init__(self):
         self.imu_scaler = StandardScaler()
         self.tof_scaler = StandardScaler()
         self.imu_dim = None
            
    def fit(self, X, imu_dim):
         self.imu_dim = imu_dim
         self.imu_scaler.fit(X[:, :imu_dim])
         self.tof_scaler.fit(X[:, imu_dim:])
         return self
            
    def transform(self, X):
         X_imu = self.imu_scaler.transform(X[:, :self.imu_dim])
         X_tof = self.tof_scaler.transform(X[:, self.imu_dim:])
         return np.concatenate([X_imu, X_tof], axis=1)

E0000 00:00:1754137852.773308      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754137852.852710      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
class EnhancedCMIMetricCallback(tf.keras.callbacks.Callback):
    def __init__(self, X_val, y_val_gestures, gesture_classes, bfrb_gestures, imu_dim, patience=40, verbose=1):
        super().__init__()
        self.X_val = X_val
        self.y_val_gestures = y_val_gestures
        self.gesture_classes = gesture_classes
        self.bfrb_gestures = bfrb_gestures
        self.imu_dim = imu_dim
        self.patience = patience
        self.verbose = verbose
        self.best_score = -np.inf
        self.wait = 0
        self.best_weights = None
        
    def on_epoch_end(self, epoch, logs=None):
        dual_scores = evaluate_dual_cmi_metric(
            self.model, self.X_val, self.y_val_gestures, 
            self.gesture_classes, self.bfrb_gestures, self.imu_dim
        )
        
        realistic_composite = dual_scores['composite_score']
        
        logs = logs or {}
        logs['val_realistic_composite'] = realistic_composite
        logs['val_full_sensor_composite'] = dual_scores['full_sensor_composite']
        logs['val_imu_only_composite'] = dual_scores['imu_only_composite']
        logs['val_sensor_dependency'] = dual_scores['sensor_dependency']
        
        # Silent progress
        if self.verbose > 0:
            print('.', end='', flush=True)
        
        if realistic_composite > self.best_score:
            self.best_score = realistic_composite
            self.wait = 0
            self.best_weights = self.model.get_weights()
        else:
            self.wait += 1
            
        if self.wait >= self.patience:
            self.model.stop_training = True
            
    def on_train_end(self, logs=None):
        if self.best_weights is not None:
            self.model.set_weights(self.best_weights)
        

In [3]:
def create_detailed_confusion_analysis(models, X_val_all, y_val_all, le_classes, bfrb_gestures):
    """Create detailed confusion matrix and misclassification analysis"""
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.metrics import confusion_matrix
    
    # Get ensemble predictions
    all_predictions = []
    for model in models:
        pred = model.predict(X_val_all, verbose=0)[0]
        all_predictions.append(pred)
    
    ensemble_pred = np.mean(all_predictions, axis=0)
    pred_classes = ensemble_pred.argmax(axis=1)
    true_classes = y_val_all.argmax(axis=1)
    
    # Create confusion matrix
    cm = confusion_matrix(true_classes, pred_classes)
    
    print("\n" + "="*80)
    print("DETAILED CONFUSION MATRIX ANALYSIS")
    print("="*80)
    
    # 1. Overall confusion matrix visualization
    plt.figure(figsize=(15, 12))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=[cls[:20] for cls in le_classes],
                yticklabels=[cls[:20] for cls in le_classes])
    plt.title('Gesture Confusion Matrix')
    plt.ylabel('True Gesture')
    plt.xlabel('Predicted Gesture')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.savefig(EXPORT_DIR / 'confusion_matrix.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    # 2. Detailed misclassification analysis
    print("\n1. TOP MISCLASSIFICATIONS:")
    print("-" * 50)
    
    misclassifications = []
    for i, true_class in enumerate(le_classes):
        for j, pred_class in enumerate(le_classes):
            if i != j and cm[i][j] > 0:
                misclassifications.append({
                    'true': true_class,
                    'predicted': pred_class, 
                    'count': cm[i][j],
                    'true_is_bfrb': true_class in bfrb_gestures,
                    'pred_is_bfrb': pred_class in bfrb_gestures
                })
    
    # Sort by count
    misclassifications.sort(key=lambda x: x['count'], reverse=True)
    
    print("Most frequent misclassifications:")
    for i, mc in enumerate(misclassifications[:15]):
        true_marker = " " if mc['true_is_bfrb'] else "  "
        pred_marker = " " if mc['pred_is_bfrb'] else "  "
        print(f"{i+1:2d}. {true_marker} {mc['true'][:25]:25} → {pred_marker} {mc['predicted'][:25]:25} ({mc['count']} times)")
    
    # 3. BFRB-specific confusion analysis
    print("\n2. BFRB GESTURE CONFUSION PATTERNS:")
    print("-" * 50)
    
    bfrb_confusions = [mc for mc in misclassifications if mc['true_is_bfrb']]
    
    print("BFRB gestures confused with other BFRB gestures:")
    bfrb_to_bfrb = [mc for mc in bfrb_confusions if mc['pred_is_bfrb']]
    for mc in bfrb_to_bfrb[:10]:
        print(f"   {mc['true'][:30]:30} →  {mc['predicted'][:30]:30} ({mc['count']}x)")
    
    print(f"\nBFRB gestures confused with Non-BFRB gestures:")
    bfrb_to_non = [mc for mc in bfrb_confusions if not mc['pred_is_bfrb']]
    for mc in bfrb_to_non[:10]:
        print(f"   {mc['true'][:30]:30} →    {mc['predicted'][:30]:30} ({mc['count']}x)")
    
    # 4. Gesture-specific accuracy
    print("\n3. INDIVIDUAL GESTURE ACCURACY:")
    print("-" * 50)
    
    gesture_accuracy = {}
    for i, gesture in enumerate(le_classes):
        total_true = cm[i].sum()
        correct = cm[i][i]
        accuracy = correct / total_true if total_true > 0 else 0
        gesture_accuracy[gesture] = accuracy
    
    print("BFRB Gesture Accuracy:")
    bfrb_acc = {g: a for g, a in gesture_accuracy.items() if g in bfrb_gestures}
    for gesture, acc in sorted(bfrb_acc.items(), key=lambda x: x[1]):
        print(f"  {gesture[:35]:35} {acc:.3f}")
    
    print(f"\nNon-BFRB Gesture Accuracy (top 10):")
    non_bfrb_acc = {g: a for g, a in gesture_accuracy.items() if g not in bfrb_gestures}
    for gesture, acc in sorted(non_bfrb_acc.items(), key=lambda x: x[1], reverse=True)[:10]:
        print(f"  {gesture[:35]:35} {acc:.3f}")
    
    # 5. Actionable insights
    print("\n4. ACTIONABLE INSIGHTS:")
    print("-" * 50)
    
    worst_bfrb = sorted(bfrb_acc.items(), key=lambda x: x[1])[:3]
    print("PRIORITY: Worst performing BFRB gestures:")
    for gesture, acc in worst_bfrb:
        main_confusions = [mc for mc in misclassifications if mc['true'] == gesture][:3]
        print(f"\n  • {gesture} (accuracy: {acc:.3f})")
        print("    Most confused with:")
        for mc in main_confusions:
            conf_type = "BFRB" if mc['pred_is_bfrb'] else "Non-BFRB"
            print(f"      - {mc['predicted']} ({conf_type}, {mc['count']}x)")
    
    return cm, misclassifications, gesture_accuracy

In [4]:
def time_sum(x):
    return tf.reduce_sum(x, axis=1)

def squeeze_last_axis(x):
    return tf.squeeze(x, axis=-1)

def expand_last_axis(x):
    return tf.expand_dims(x, axis=-1)

def se_block(x, reduction=8):
    ch = x.shape[-1]
    se = GlobalAveragePooling1D()(x)
    se = Dense(ch // reduction, activation='relu')(se)
    se = Dense(ch, activation='sigmoid')(se)
    se = Reshape((1, ch))(se)
    return Multiply()([x, se])

def residual_se_cnn_block(x, filters, kernel_size, pool_size=2, drop=0.3, wd=1e-4):
    shortcut = x
    for _ in range(2):
        x = Conv1D(filters, kernel_size, padding='same', use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
    x = se_block(x)
    
    if shortcut.shape[-1] != filters:
        shortcut = Conv1D(filters, 1, padding='same', use_bias=False, kernel_regularizer=l2(wd))(shortcut)
        shortcut = BatchNormalization()(shortcut)
    
    x = add([x, shortcut])
    x = Activation('relu')(x)
    x = MaxPooling1D(pool_size)(x)
    x = Dropout(drop)(x)
    return x

def attention_layer(inputs):
    score = Dense(1, activation='tanh')(inputs)
    score = Lambda(squeeze_last_axis)(score)
    weights = Activation('softmax')(score)
    weights = Lambda(expand_last_axis)(weights)
    context = Multiply()([inputs, weights])
    context = Lambda(time_sum)(context)
    return context

class GatedMixupGenerator(tf.keras.utils.Sequence):
    def __init__(self, X, y, batch_size, imu_dim, class_weight=None, alpha=0.2, masking_prob=0.0):
        self.X, self.y = X, y
        self.batch = batch_size
        self.imu_dim = imu_dim
        self.class_weight = class_weight
        self.alpha = alpha
        self.masking_prob = masking_prob
        self.indices = np.arange(len(X))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.X) / self.batch))

    def __getitem__(self, i):
        idx = self.indices[i*self.batch:(i+1)*self.batch]
        Xb, yb = self.X[idx].copy(), self.y[idx].copy()
        
        sample_weights = np.ones(len(Xb), dtype='float32')
        if self.class_weight:
            y_integers = yb.argmax(axis=1)
            sample_weights = np.array([self.class_weight[i] for i in y_integers])
        
        gate_target = np.ones(len(Xb), dtype='float32')
        if self.masking_prob > 0:
            for i in range(len(Xb)):
                if np.random.rand() < self.masking_prob:
                    Xb[i, :, self.imu_dim:] = 0
                    gate_target[i] = 0.0
        
        if self.alpha > 0:
            lam = np.random.beta(self.alpha, self.alpha)
            perm = np.random.permutation(len(Xb))
            X_mix = lam * Xb + (1 - lam) * Xb[perm]
            y_mix = lam * yb + (1 - lam) * yb[perm]
            gate_target_mix = lam * gate_target + (1 - lam) * gate_target[perm]
            sample_weights_mix = lam * sample_weights + (1 - lam) * sample_weights[perm]
            return X_mix, {'main_output': y_mix, 'tof_gate': gate_target_mix}, sample_weights_mix
        
        return Xb, {'main_output': yb, 'tof_gate': gate_target}, sample_weights

    def on_epoch_end(self):
        np.random.shuffle(self.indices)

def build_gated_two_branch_model(pad_len, imu_dim, tof_dim, n_classes, wd=1e-4):
    inp = Input(shape=(pad_len, imu_dim+tof_dim))
    imu = Lambda(lambda t: t[:, :, :imu_dim])(inp)
    tof = Lambda(lambda t: t[:, :, imu_dim:])(inp)
    
    x1 = residual_se_cnn_block(imu, 64, 3, drop=0.1, wd=wd)
    x1 = residual_se_cnn_block(x1, 128, 5, drop=0.1, wd=wd)
    
    x2_base = Conv1D(64, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(tof)
    x2_base = BatchNormalization()(x2_base)
    x2_base = Activation('relu')(x2_base)
    x2_base = MaxPooling1D(2)(x2_base)
    x2_base = Dropout(0.2)(x2_base)
    
    x2_base = Conv1D(128, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(x2_base)
    x2_base = BatchNormalization()(x2_base)
    x2_base = Activation('relu')(x2_base)
    x2_base = MaxPooling1D(2)(x2_base)
    x2_base = Dropout(0.2)(x2_base)
    
    gate_input = GlobalAveragePooling1D()(tof)
    gate_input = Dense(16, activation='relu')(gate_input)
    gate = Dense(1, activation='sigmoid', name='tof_gate')(gate_input)
    x2 = Multiply()([x2_base, gate])
    
    merged = Concatenate()([x1, x2])
    xa = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(wd)))(merged)
    xb = Bidirectional(GRU(128, return_sequences=True, kernel_regularizer=l2(wd)))(merged)
    xc = GaussianNoise(0.09)(merged)
    xc = Dense(16, activation='elu')(xc)
    x = Concatenate()([xa, xb, xc])
    x = Dropout(0.4)(x)
    x = attention_layer(x)
    
    for units, drop in [(256, 0.5), (128, 0.3)]:
        x = Dense(units, use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(drop)(x)
    
    out = Dense(n_classes, activation='softmax', name='main_output', kernel_regularizer=l2(wd))(x)
    return Model(inputs=inp, outputs=[out, gate])

In [5]:
if TRAIN:
    print("----------TRAINING MODE---------")
    
    train = pd.read_csv(BASE_DIR / "train.csv")
    train_dem = pd.read_csv(BASE_DIR / "train_demographics.csv")
    df = pd.merge(train, train_dem, on='subject', how='left')
    
    le = LabelEncoder()
    df['gesture_int'] = le.fit_transform(df['gesture'])
    np.save(EXPORT_DIR / "gesture_classes.npy", le.classes_)
    print("Data loaded | Unique gestures:", len(le.classes_))
    
    bfrb_gestures = [
        'Above ear - pull hair',
        'Forehead - pull hairline', 
        'Forehead - scratch',
        'Eyebrow - pull hair',
        'Eyelash - pull hair',
        'Neck - pinch skin',
        'Neck - scratch',
        'Cheek - pinch skin'
    ]
    
    print("Calculating physics-based features with sequence grouping...")
    
    linear_accel_list = []
    for _, group in df.groupby('sequence_id'):
        linear_accel = remove_gravity_from_acc(
            group[['acc_x', 'acc_y', 'acc_z']], 
            group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
        )
        linear_accel_df = pd.DataFrame(
            linear_accel,
            columns=['linear_acc_x', 'linear_acc_y', 'linear_acc_z'],
            index=group.index
        )
        linear_accel_list.append(linear_accel_df)
    
    df = pd.concat([df, pd.concat(linear_accel_list)], axis=1)
    df['linear_acc_mag'] = np.sqrt(df['linear_acc_x']**2 + df['linear_acc_y']**2 + df['linear_acc_z']**2)
    df['linear_acc_mag_jerk'] = df.groupby('sequence_id')['linear_acc_mag'].diff().fillna(0)
    
    angular_vel_list = []
    for _, group in df.groupby('sequence_id'):
        angular_vel = calculate_angular_velocity_from_quat(
            group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
        )
        angular_vel_df = pd.DataFrame(
            angular_vel,
            columns=['angular_vel_x', 'angular_vel_y', 'angular_vel_z'],
            index=group.index
        )
        angular_vel_list.append(angular_vel_df)
    
    df = pd.concat([df, pd.concat(angular_vel_list)], axis=1)
    df['angular_vel_mag'] = np.sqrt(df['angular_vel_x']**2 + df['angular_vel_y']**2 + df['angular_vel_z']**2)
    df['angular_vel_mag_jerk'] = df.groupby('sequence_id')['angular_vel_mag'].diff().fillna(0)
    
    angular_dist_list = []
    for _, group in df.groupby('sequence_id'):
        angular_dist = calculate_angular_distance(
            group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
        )
        angular_dist_df = pd.DataFrame(
            angular_dist,
            columns=['angular_distance'],
            index=group.index
        )
        angular_dist_list.append(angular_dist_df)
    
    df = pd.concat([df, pd.concat(angular_dist_list)], axis=1)
    df['gesture_rhythm_signature'] = df.groupby('sequence_id')['linear_acc_mag'].transform(
        lambda x: x.rolling(5, min_periods=1).std() / (x.rolling(5, min_periods=1).mean() + 1e-6)
    )
    imu_cols_base = ['linear_acc_x', 'linear_acc_y', 'linear_acc_z'] + [c for c in df.columns if c.startswith('rot_')]
    imu_engineered = ['linear_acc_mag', 'linear_acc_mag_jerk', 'angular_vel_x', 'angular_vel_y', 'angular_vel_z', 'angular_distance','angular_vel_mag','angular_vel_mag_jerk','gesture_rhythm_signature']
    imu_cols = list(dict.fromkeys(imu_cols_base + imu_engineered))
    
    thm_cols = [c for c in df.columns if c.startswith('thm_')]
    tof_agg_cols = []
    for i in range(1, 6):
        tof_agg_cols.extend([f'tof_{i}_mean', f'tof_{i}_std', f'tof_{i}_min', f'tof_{i}_max'])
    
    final_feature_cols = imu_cols + thm_cols + tof_agg_cols
    imu_dim = len(imu_cols)
    tof_thm_dim = len(thm_cols) + len(tof_agg_cols)
    
    print(f"Feature dimensions: IMU={imu_dim} | TOF/THM={tof_thm_dim} | Total={len(final_feature_cols)}")
    np.save(EXPORT_DIR / "feature_cols.npy", np.array(final_feature_cols))
    
    print("Building sequences...")
    seq_gp = df.groupby('sequence_id')
    X_list_unscaled, y_list, groups_list, lens = [], [], [], []
    
    for seq_id, seq_df in seq_gp:
        seq_df_copy = seq_df.copy()
        for i in range(1, 6):
            pixel_cols = [f"tof_{i}_v{p}" for p in range(64)]
            tof_sensor_data = seq_df_copy[pixel_cols].replace(-1, np.nan)
            seq_df_copy[f'tof_{i}_mean'] = tof_sensor_data.mean(axis=1)
            seq_df_copy[f'tof_{i}_std'] = tof_sensor_data.std(axis=1)
            seq_df_copy[f'tof_{i}_min'] = tof_sensor_data.min(axis=1)
            seq_df_copy[f'tof_{i}_max'] = tof_sensor_data.max(axis=1)
        
        mat_unscaled = seq_df_copy[final_feature_cols].ffill().bfill().fillna(0).values.astype('float32')
        X_list_unscaled.append(mat_unscaled)
        y_list.append(seq_df_copy['gesture_int'].iloc[0])
        groups_list.append(seq_df_copy['subject'].iloc[0])
        lens.append(len(mat_unscaled))
    
    print("Fitting IMU-Specific StandardScalers...")
    all_steps_concatenated = np.concatenate(X_list_unscaled, axis=0)
    scaler = IMUSpecificScaler().fit(all_steps_concatenated, imu_dim)
    joblib.dump(scaler, EXPORT_DIR / "imu_specific_scaler.pkl")
    
    print("Scaling and padding sequences with IMU-specific normalization...")
    X_scaled_list = [scaler.transform(x_seq) for x_seq in X_list_unscaled]
    del X_list_unscaled
    
    pad_len = int(np.percentile(lens, PAD_PERCENTILE))
    np.save(EXPORT_DIR / "sequence_maxlen.npy", pad_len)
    
    X = pad_sequences(X_scaled_list, maxlen=pad_len, padding='post', truncating='post', dtype='float32')
    del X_scaled_list
    
    y_stratify = np.array(y_list)
    y = to_categorical(y_list, num_classes=len(le.classes_))
    groups = np.array(groups_list)
    
    print(f"Starting realistic {N_SPLITS}-fold training with dual evaluation...")
    sgkf = StratifiedGroupKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
    fold_realistic_scores = []
    fold_gesture_scores = []

    for fold, (train_idx, val_idx) in enumerate(sgkf.split(X, y_stratify, groups)):
        print(f"\n{'='*50}")
        print(f"FOLD {fold+1}/{N_SPLITS} - Training in progress", end='')
        
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]
        y_val_gestures = le.classes_[y_stratify[val_idx]]

        model = build_gated_two_branch_model(pad_len, imu_dim, tof_thm_dim, len(le.classes_), wd=WD)
        model.compile(
            optimizer=Adam(LR_INIT),
            loss={
                'main_output': tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),  
                'tof_gate': 'binary_crossentropy'
            },
            loss_weights={'main_output': 1.0, 'tof_gate': GATE_LOSS_WEIGHT},
            metrics={'main_output': 'accuracy'}
        )
        
        class_weights = compute_class_weight(
            'balanced', 
            classes=np.arange(len(le.classes_)), 
            y=y_train.argmax(1)
        )
        class_weight_dict = dict(enumerate(class_weights))
        
        train_gen = GatedMixupGenerator(
            X_train, y_train, BATCH_SIZE, imu_dim,
            class_weight=class_weight_dict, alpha=MIXUP_ALPHA, masking_prob=MASKING_PROB
        )
        val_gen = GatedMixupGenerator(X_val, y_val, BATCH_SIZE, imu_dim)
        enhanced_callback = EnhancedCMIMetricCallback(
            X_val, y_val_gestures, le.classes_, bfrb_gestures, imu_dim,
            patience=PATIENCE, verbose=1
        )
        
        model.fit(
            train_gen, validation_data=val_gen, epochs=EPOCHS,
            callbacks=[enhanced_callback], verbose=0
        )
        
        final_scores = evaluate_dual_cmi_metric(
            model, X_val, y_val_gestures, le.classes_, bfrb_gestures, imu_dim
        )
        
        gesture_scores = get_individual_gesture_scores(
            model, X_val, y_val_gestures, le.classes_, bfrb_gestures
        )
        
        print(f"\nFOLD {fold+1} COMPLETED ✓")
        print(f"Composite: {final_scores['composite_score']:.4f} | "
              f"Binary: {final_scores['binary_f1']:.4f} | "
              f"Macro: {final_scores['macro_f1']:.4f}")
        print(f"IMU-only: {final_scores['imu_only_composite']:.4f} | "
              f"Full: {final_scores['full_sensor_composite']:.4f} | "
              f"Gap: {final_scores['sensor_dependency']:.4f}")
        
        print("\nIndividual Gesture F1 Scores:")
        for gesture, score in sorted(gesture_scores.items(), key=lambda x: x[1], reverse=True):
            print(f"  {gesture[:30]:30} {score:.3f}")
        
        fold_realistic_scores.append(final_scores)
        fold_gesture_scores.append(gesture_scores)
        
        model.save(EXPORT_DIR / f"gesture_model_fold_{fold}.h5")
        print(f"Model saved: fold_{fold}.h5")

    print("\n----Training Complete----")
    print("\nAverage Results Across All Folds:")
    avg_scores = {
        'composite_score': np.mean([s['composite_score'] for s in fold_realistic_scores]),
        'macro_f1': np.mean([s['macro_f1'] for s in fold_realistic_scores]),
        'binary_f1': np.mean([s['binary_f1'] for s in fold_realistic_scores]),
        'imu_only_composite': np.mean([s['imu_only_composite'] for s in fold_realistic_scores]),
        'full_sensor_composite': np.mean([s['full_sensor_composite'] for s in fold_realistic_scores]),
        'sensor_dependency': np.mean([s['sensor_dependency'] for s in fold_realistic_scores])
    }
    print(f"Actual Composite: {avg_scores['composite_score']:.4f}")
    print(f"Macro: {avg_scores['macro_f1']:.4f}")
    print(f"Binary: {avg_scores['binary_f1']:.4f}")
    print(f"Imu: {avg_scores['imu_only_composite']:.4f}")
    print(f"Full: {avg_scores['full_sensor_composite']:.4f}")
    print(f"Sensor Gap: {avg_scores['sensor_dependency']:.4f}")
    print("\nGenerating detailed confusion analysis...")
    X_val_all = []
    y_val_all = []
    for fold, (train_idx, val_idx) in enumerate(sgkf.split(X, y_stratify, groups)):
        X_val_all.append(X[val_idx])
        y_val_all.append(y[val_idx])
    X_val_combined = np.concatenate(X_val_all, axis=0)
    y_val_combined = np.concatenate(y_val_all, axis=0)
    
    models = []
    for fold in range(N_SPLITS):
        model = load_model(EXPORT_DIR / f"gesture_model_fold_{fold}.h5", 
                           custom_objects={
                               'time_sum': time_sum,
                               'squeeze_last_axis': squeeze_last_axis,
                               'expand_last_axis': expand_last_axis
                           })
        models.append(model)
    cm, misclassifications, gesture_accuracy = create_detailed_confusion_analysis(
        models, X_val_combined, y_val_combined, le.classes_, bfrb_gestures
    )

else:
    print("▶ INFERENCE MODE – loading artifacts from", PRETRAINED_DIR)
    
    # Load all saved artifacts - UPDATED FOR NEW SCALER
    final_feature_cols = np.load(PRETRAINED_DIR / "feature_cols.npy", allow_pickle=True).tolist()
    pad_len = int(np.load(PRETRAINED_DIR / "sequence_maxlen.npy"))
    scaler = joblib.load(PRETRAINED_DIR / "imu_specific_scaler.pkl")  # Updated scaler
    gesture_classes = np.load(PRETRAINED_DIR / "gesture_classes.npy", allow_pickle=True)
    
    print(f"  Loaded feature columns: {len(final_feature_cols)}")
    print(f"  Sequence padding length: {pad_len}")
    print(f"  Gesture classes: {len(gesture_classes)}")
    
    # Define custom objects for model loading
    custom_objects = {
        'time_sum': time_sum,
        'squeeze_last_axis': squeeze_last_axis, 
        'expand_last_axis': expand_last_axis,
        'se_block': se_block,
        'residual_se_cnn_block': residual_se_cnn_block,
        'attention_layer': attention_layer,
    }
    
    # Load ensemble of models
    models = []
    print(f"  Loading {N_SPLITS} models for ensemble inference...")
    for fold in range(N_SPLITS):
        model_path = PRETRAINED_DIR / f"gesture_model_fold_{fold}.h5"
        if model_path.exists():
            model = load_model(model_path, compile=False, custom_objects=custom_objects)
            models.append(model)
            print(f"    ✓ Loaded fold {fold} model")
        else:
            print(f"    ✗ Model fold {fold} not found at {model_path}")
    
    print(f"  Successfully loaded {len(models)} models")
    print("  Models, scaler, feature_cols, pad_len loaded – ready for evaluation")
    
    def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
        """
        Predict gesture from sequence data using ensemble of trained models
        """
        try:
            # Convert to pandas for processing
            df_seq = sequence.to_pandas()
            
            # Calculate physics-based features
            # 1. Linear acceleration (gravity removed)
            linear_accel = remove_gravity_from_acc(
                df_seq[['acc_x', 'acc_y', 'acc_z']], 
                df_seq[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
            )
            df_seq['linear_acc_x'] = linear_accel[:, 0]
            df_seq['linear_acc_y'] = linear_accel[:, 1] 
            df_seq['linear_acc_z'] = linear_accel[:, 2]
            df_seq['linear_acc_mag'] = np.sqrt(
                df_seq['linear_acc_x']**2 + df_seq['linear_acc_y']**2 + df_seq['linear_acc_z']**2
            )
            df_seq['linear_acc_mag_jerk'] = df_seq['linear_acc_mag'].diff().fillna(0)
            
            # 2. Angular velocity from quaternions
            angular_vel = calculate_angular_velocity_from_quat(
                df_seq[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
            )
            df_seq['angular_vel_x'] = angular_vel[:, 0]
            df_seq['angular_vel_y'] = angular_vel[:, 1]
            df_seq['angular_vel_z'] = angular_vel[:, 2]
            df_seq['angular_vel_mag'] = np.sqrt(df_seq['angular_vel_x']**2 + df_seq['angular_vel_y']**2 + df_seq['angular_vel_z']**2)
            df_seq['angular_vel_mag_jerk'] = df_seq['angular_vel_mag'].diff().fillna(0)
            
            # 3. Angular distance
            angular_dist = calculate_angular_distance(
                df_seq[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
            )
            df_seq['angular_distance'] = angular_dist
            df_seq['gesture_rhythm_signature'] = df_seq['linear_acc_mag'].rolling(5, min_periods=1).std() / (df_seq['linear_acc_mag'].rolling(5, min_periods=1).mean() + 1e-6)
            
            
            # 4. TOF sensor aggregations
            for i in range(1, 6):
                pixel_cols = [f"tof_{i}_v{p}" for p in range(64)]
                tof_sensor_data = df_seq[pixel_cols].replace(-1, np.nan)
                
                df_seq[f'tof_{i}_mean'] = tof_sensor_data.mean(axis=1)
                df_seq[f'tof_{i}_std'] = tof_sensor_data.std(axis=1)
                df_seq[f'tof_{i}_min'] = tof_sensor_data.min(axis=1)
                df_seq[f'tof_{i}_max'] = tof_sensor_data.max(axis=1)
            
            # Extract features in the same order as training
            mat_unscaled = df_seq[final_feature_cols].ffill().bfill().fillna(0).values.astype('float32')
            
            # Scale features using training scaler - NOW USING IMU-SPECIFIC SCALING
            mat_scaled = scaler.transform(mat_unscaled)  # No need to pass imu_dim
            
            # Pad sequence to match training length
            padded_input = pad_sequences(
                [mat_scaled], 
                maxlen=pad_len, 
                padding='post', 
                truncating='post', 
                dtype='float32'
            )
            
            # Ensemble prediction from all folds
            all_predictions = []
            for model in models:
                # Get main output predictions (ignore gate output)
                pred = model.predict(padded_input, verbose=0)[0]
                all_predictions.append(pred)
            
            # Average predictions across all models
            ensemble_pred = np.mean(all_predictions, axis=0)
            
            # Return predicted gesture class
            predicted_class_idx = ensemble_pred.argmax()
            predicted_gesture = gesture_classes[predicted_class_idx]
            
            return str(predicted_gesture)
            
        except Exception as e:
            print(f"Error in prediction: {e}")
            # Return most common gesture as fallback
            return str(gesture_classes[0])
    
    # Set up inference server
    import kaggle_evaluation.cmi_inference_server
    inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)
    
    if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
        print("  Starting competition inference server...")
        inference_server.serve()
    else:
        print("  Starting local inference gateway...")
        inference_server.run_local_gateway(
            data_paths=(
                '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
                '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv'
            )
        )

▶ INFERENCE MODE – loading artifacts from /kaggle/input/artifact0
  Loaded feature columns: 40
  Sequence padding length: 127
  Gesture classes: 18
  Loading 10 models for ensemble inference...


I0000 00:00:1754137865.883306      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


    ✓ Loaded fold 0 model
    ✓ Loaded fold 1 model
    ✓ Loaded fold 2 model
    ✓ Loaded fold 3 model
    ✓ Loaded fold 4 model
    ✓ Loaded fold 5 model
    ✓ Loaded fold 6 model
    ✓ Loaded fold 7 model
    ✓ Loaded fold 8 model
    ✓ Loaded fold 9 model
  Successfully loaded 10 models
  Models, scaler, feature_cols, pad_len loaded – ready for evaluation
  Starting local inference gateway...


I0000 00:00:1754137876.139724      61 cuda_dnn.cc:529] Loaded cuDNN version 90300
