<a href="https://www.kaggle.com/code/samithsachidanandan/lb-0-72-cmi-2025-tensor-flow?scriptVersionId=244536652" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

## CMI - Detect Behavior with Sensor Data

The goal of this competition is to develop a predictive model that distinguishes BFRB-like and non-BFRB-like activity using data from a variety of sensors collected via a wrist-worn device. Successfully disentangling these behaviours will improve the design and accuracy of wearable BFRB-detection devices, which are relevant to a wide range of mental illnesses, ultimately strengthening the tools available to support their treatment.

## Importing the necessary Libraries

In [9]:
# Two‑Branch Human‑Activity‑Recognition Pipeline (IMU + Thermopile/TOF  + SE‑CNN + BiLSTM + Attention)
import os, json, joblib, numpy as np, pandas as pd
from pathlib import Path
import warnings 
warnings.filterwarnings("ignore")


from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

from tensorflow.keras.utils import Sequence, to_categorical, pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Input, Conv1D, BatchNormalization, Activation, add, MaxPooling1D, Dropout,
    Bidirectional, LSTM, GlobalAveragePooling1D, Dense, Multiply, Reshape,
    Lambda, Concatenate
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
import tensorflow as tf
import polars as pl


### Configuration

This sets the hyperparameters and paths. Can play around the these values to get higher LB score. 

Training hyperparameters:

BATCH_SIZE: Minibatch size.

PAD_PERCENTILE: Used for sequence padding.

LR_INIT: Learning rate.

WD: Weight decay (L2).

MIXUP_ALPHA: If using mixup augmentation.

EPOCHS: Max training epochs.

PATIENCE: Early stopping patience.

In [10]:
# (Competition metric will only be imported when TRAINing)
TRAIN = True                     # ← set to True when you want to train
RAW_DIR = Path("/kaggle/input/cmi-detect-behavior-with-sensor-data")
PRETRAINED_DIR = Path("/kaggle/input/pretrained-model")  # used when TRAIN=False
EXPORT_DIR = Path("./")                                    # artefacts will be saved here
BATCH_SIZE = 64
PAD_PERCENTILE = 95
LR_INIT = 1e-3
WD = 3e-4
MIXUP_ALPHA = 0.4
EPOCHS = 160
PATIENCE = 40


print("▶ imports ready · tensorflow", tf.__version__)

▶ imports ready · tensorflow 2.18.0


## Utility Functions

Utility Fucntions are having modular helper fucntions which simplify the architecture design in TensorFlow/Keras models.

In [11]:
#Tensor Manipulations
def time_sum(x):
    return K.sum(x, axis=1)

def squeeze_last_axis(x):
    return tf.squeeze(x, axis=-1)

def expand_last_axis(x):
    return tf.expand_dims(x, axis=-1)

def se_block(x, reduction=8):
    ch = x.shape[-1]
    se = GlobalAveragePooling1D()(x)
    se = Dense(ch // reduction, activation='relu')(se)
    se = Dense(ch, activation='sigmoid')(se)
    se = Reshape((1, ch))(se)
    return Multiply()([x, se])


# Residual CNN Block with SE
def residual_se_cnn_block(x, filters, kernel_size, pool_size=2, drop=0.3, wd=1e-4):
    shortcut = x
    for _ in range(2):
        x = Conv1D(filters, kernel_size, padding='same', use_bias=False,
                   kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
    x = se_block(x)
    if shortcut.shape[-1] != filters:
        shortcut = Conv1D(filters, 1, padding='same', use_bias=False,
                          kernel_regularizer=l2(wd))(shortcut)
        shortcut = BatchNormalization()(shortcut)
    x = add([x, shortcut])
    x = Activation('relu')(x)
    x = MaxPooling1D(pool_size)(x)
    x = Dropout(drop)(x)
    return x

def attention_layer(inputs):
    score = Dense(1, activation='tanh')(inputs)
    score = Lambda(squeeze_last_axis)(score)
    weights = Activation('softmax')(score)
    weights = Lambda(expand_last_axis)(weights)
    context = Multiply()([inputs, weights])
    context = Lambda(time_sum)(context)
    return context


## Data Helpers

In [12]:
# Normalizes and cleans the time series sequence. 

def preprocess_sequence(df_seq: pd.DataFrame, feature_cols: list[str], scaler: StandardScaler):
    mat = df_seq[feature_cols].ffill().bfill().fillna(0).values
    return scaler.transform(mat).astype('float32')

# MixUp the data argumentation in order to regularize the neural network. 

class MixupGenerator(Sequence):
    def __init__(self, X, y, batch_size, alpha=0.2):
        self.X, self.y = X, y
        self.batch = batch_size
        self.alpha = alpha
        self.indices = np.arange(len(X))
    def __len__(self):
        return int(np.ceil(len(self.X) / self.batch))
    def __getitem__(self, i):
        idx = self.indices[i*self.batch:(i+1)*self.batch]
        Xb, yb = self.X[idx], self.y[idx]
        lam = np.random.beta(self.alpha, self.alpha)
        perm = np.random.permutation(len(Xb))
        X_mix = lam * Xb + (1-lam) * Xb[perm]
        y_mix = lam * yb + (1-lam) * yb[perm]
        return X_mix, y_mix
    def on_epoch_end(self):
        np.random.shuffle(self.indices)

## Model Definition - Two Branch Architecture

In [13]:


def build_two_branch_model(pad_len, imu_dim, tof_dim, n_classes, wd=1e-4):
    inp = Input(shape=(pad_len, imu_dim+tof_dim))
    imu = Lambda(lambda t: t[:, :, :imu_dim])(inp)
    tof = Lambda(lambda t: t[:, :, imu_dim:])(inp)

    # IMU deep branch
    x1 = residual_se_cnn_block(imu, 64, 3, drop=0.3, wd=wd)
    x1 = residual_se_cnn_block(x1, 128, 5, drop=0.3, wd=wd)

    # TOF/Thermal lighter branch
    x2 = Conv1D(64, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(tof)
    x2 = BatchNormalization()(x2); x2 = Activation('relu')(x2)
    x2 = MaxPooling1D(2)(x2); x2 = Dropout(0.3)(x2)
    x2 = Conv1D(128, 3, padding='same', use_bias=False, kernel_regularizer=l2(wd))(x2)
    x2 = BatchNormalization()(x2); x2 = Activation('relu')(x2)
    x2 = MaxPooling1D(2)(x2); x2 = Dropout(0.3)(x2)

    merged = Concatenate()([x1, x2])

    x = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(wd)))(merged)
    x = Dropout(0.4)(x)
    x = attention_layer(x)

    for units, drop in [(256, 0.5), (128, 0.3)]:
        x = Dense(units, use_bias=False, kernel_regularizer=l2(wd))(x)
        x = BatchNormalization()(x); x = Activation('relu')(x)
        x = Dropout(drop)(x)

    out = Dense(n_classes, activation='softmax', kernel_regularizer=l2(wd))(x)
    return Model(inp, out)

## Training / Inference Pipeline

In [14]:
if TRAIN:
    print("▶ TRAIN MODE – loading dataset …")
    df = pd.read_csv(RAW_DIR / "train.csv")

    # label encoding
    le = LabelEncoder(); df['gesture_int'] = le.fit_transform(df['gesture'])
    np.save(EXPORT_DIR / "gesture_classes.npy", le.classes_)

    # feature list
    meta_cols = {'gesture', 'gesture_int', 'sequence_type', 'behavior', 'orientation',
                 'row_id', 'subject', 'phase', 'sequence_id', 'sequence_counter'}
    feature_cols = [c for c in df.columns if c not in meta_cols]

    imu_cols  = [c for c in feature_cols if not (c.startswith('thm_') or c.startswith('tof_'))]
    tof_cols  = [c for c in feature_cols if c.startswith('thm_') or c.startswith('tof_')]
    print(f"  IMU {len(imu_cols)} | TOF/THM {len(tof_cols)} | total {len(feature_cols)} features")

    # global scaler
    scaler = StandardScaler().fit(df[feature_cols].ffill().bfill().fillna(0).values)
    joblib.dump(scaler, EXPORT_DIR / "scaler.pkl")

    # build sequences
    seq_gp = df.groupby('sequence_id')
    X_list, y_list, lens = [], [], []
    for seq_id, seq in seq_gp:
        mat = preprocess_sequence(seq, feature_cols, scaler)
        X_list.append(mat)
        y_list.append(seq['gesture_int'].iloc[0])
        lens.append(len(mat))
    pad_len = int(np.percentile(lens, PAD_PERCENTILE))
    np.save(EXPORT_DIR / "sequence_maxlen.npy", pad_len)
    np.save(EXPORT_DIR / "feature_cols.npy", np.array(feature_cols))

    # Get gesture labels and groups
    y_labels = np.array(y_list)
    groups = np.array([seq_id for seq_id, _ in seq_gp])  # sequence_id per sample
    
    # Convert X and y to numpy arrays
    X = pad_sequences(X_list, maxlen=pad_len, padding='post', truncating='post', dtype='float32')
    y = to_categorical(y_labels, num_classes=len(le.classes_))
    
    # Group-aware split
    gkf = GroupKFold(n_splits=5)
    for fold, (train_idx, val_idx) in enumerate(gkf.split(X, y_labels, groups=groups)):
        X_tr, X_val = X[train_idx], X[val_idx]
        y_tr, y_val = y[train_idx], y[val_idx]
        print(f"✅ Fold {fold}: Train size = {len(train_idx)}, Val size = {len(val_idx)}")
        break  # Use only the first fold for now

    

    # class weights
    cw_vals = compute_class_weight('balanced', classes=np.arange(len(le.classes_)), y=y_list)
    class_weight = dict(enumerate(cw_vals))

    # model
    model = build_two_branch_model(pad_len, len(imu_cols), len(tof_cols), len(le.classes_), wd=WD)
    steps = len(X_tr)//BATCH_SIZE
    lr_sched = tf.keras.optimizers.schedules.CosineDecayRestarts(LR_INIT, first_decay_steps=5*steps)
    model.compile(optimizer=Adam(lr_sched),
                  loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
                  metrics=['accuracy'])

    train_gen = MixupGenerator(X_tr, y_tr, batch_size=BATCH_SIZE, alpha=MIXUP_ALPHA)
    cb = EarlyStopping(patience=PATIENCE, restore_best_weights=True, verbose=1)
    model.fit(train_gen, epochs=EPOCHS, validation_data=(X_val, y_val),
              class_weight=class_weight, callbacks=[cb], verbose=1)

    model.save(EXPORT_DIR / "gesture_two_branch_mixup.h5")
    print("✔ Training done – artefacts saved in", EXPORT_DIR)

    # quick metric
    from cmi_2025_metric_copy_for_import import CompetitionMetric
    preds = model.predict(X_val).argmax(1)
    true  = y_val.argmax(1)
    h_f1 = CompetitionMetric().calculate_hierarchical_f1(
        pd.DataFrame({'gesture': le.classes_[true]}),
        pd.DataFrame({'gesture': le.classes_[preds]}))
    print("Hold‑out H‑F1 =", round(h_f1, 4))

else:
    print("▶ INFERENCE MODE – loading artefacts from", PRETRAINED_DIR)
    feature_cols   = np.load(PRETRAINED_DIR / "feature_cols.npy", allow_pickle=True).tolist()
    pad_len        = int(np.load(PRETRAINED_DIR / "sequence_maxlen.npy"))
    scaler         = joblib.load(PRETRAINED_DIR / "scaler.pkl")
    gesture_classes = np.load(PRETRAINED_DIR / "gesture_classes.npy", allow_pickle=True)

    imu_cols = [c for c in feature_cols if not (c.startswith('thm_') or c.startswith('tof_'))]
    tof_cols = [c for c in feature_cols if c.startswith('thm_') or c.startswith('tof_')]

    custom_objs = {
        'time_sum': time_sum,
        'squeeze_last_axis': squeeze_last_axis,
        'expand_last_axis': expand_last_axis,
        'se_block': se_block,
        'residual_se_cnn_block': residual_se_cnn_block,
        'attention_layer': attention_layer,
    }
    model = load_model(PRETRAINED_DIR / "gesture_two_branch_mixup.h5",
                       compile=False, custom_objects=custom_objs)
    print("  model, scaler, pads loaded – ready for evaluation")

▶ TRAIN MODE – loading dataset …
  IMU 7 | TOF/THM 325 | total 332 features
✅ Fold 0: Train size = 6520, Val size = 1631
Epoch 1/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 54ms/step - accuracy: 0.1302 - loss: 3.4831 - val_accuracy: 0.1704 - val_loss: 3.0436
Epoch 2/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - accuracy: 0.2505 - loss: 2.9292 - val_accuracy: 0.3335 - val_loss: 2.6248
Epoch 3/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - accuracy: 0.3064 - loss: 2.7519 - val_accuracy: 0.4372 - val_loss: 2.3840
Epoch 4/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 36ms/step - accuracy: 0.3576 - loss: 2.5537 - val_accuracy: 0.4592 - val_loss: 2.2970
Epoch 5/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - accuracy: 0.3683 - loss: 2.6024 - val_accuracy: 0.4384 - val_loss: 2.3012
Epoch 6/160
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

## Predicting

In [17]:
# make sure gesture_classes exists in both modes
if TRAIN:
    gesture_classes = le.classes_


def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    global gesture_classes
    if gesture_classes is None:
        gesture_classes = np.load(PRETRAINED_DIR / "gesture_classes.npy", allow_pickle=True)

    df_seq = sequence.to_pandas()
    mat = preprocess_sequence(df_seq, feature_cols, scaler)
    pad = pad_sequences([mat], maxlen=pad_len, padding='post', truncating='post', dtype='float32')
    idx = int(model.predict(pad, verbose=0).argmax(1)[0])
    return str(gesture_classes[idx])


## Integration with Kaggle Evaluation

In [18]:
import kaggle_evaluation.cmi_inference_server
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
        )
    )

## Acknowledgement:
  [https://www.kaggle.com/code/vonmainstein/imu-tof](https://www.kaggle.com/code/vonmainstein/imu-tof)

  All credits go to this notebook. Have tried some played with some parameters to get better score. Hope this is useful.