In [1]:
import os
import random
import numpy as np
import pandas as pd
import os, json, joblib, re
import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import OneCycleLR, ReduceLROnPlateau

import polars as pl
from pathlib import Path
import warnings 
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight

import gc  # garbage collection
import psutil
from scipy.spatial.transform import Rotation as R
from scipy.interpolate import CubicSpline, interp1d, PchipInterpolator


In [2]:
# (Competition metric will only be imported when TRAINing)
TRAIN = True                     # ← set to True when you want to train

class config:
    AMP = False
    BATCH_SIZE_TRAIN = 128 #32
    BATCH_SIZE_VALID = 128 #32
    DEBUG = False
    EPOCHS = 150  #30
    FOLDS = 5
    GRADIENT_ACCUMULATION_STEPS = 1
    LEARNING_RATE = 2e-3    #vTypical values: 1e-4 to 1e-2.  best 2e-3: .77  1e-3: .77  9e-4:.76  5e-4:.74
    MAX_GRAD_NORM = 1e7
    WD = 1e-2   #1e-4 (or try higher values like 1e-3 or 5e-4  5e-3:.77  1e-2: .7958
    NUM_WORKERS = 0 # multiprocessing.cpu_count()
    PRINT_FREQ = 20
    SEED = 20
    WEIGHT_DECAY = 0.01
    PAD_PERCENTILE = 95
    SEQUENCE_LENGTH = 150

class paths:
    BASE_DIR = Path("C:/Users/konno/SynologyDrive/datasciense/projects_foler/1_kaggle/CMI/cmi-detect-behavior-with-sensor-data")
    
    OUTPUT_DIR = BASE_DIR / "output-02-wavenet"
    TEST_CSV = BASE_DIR / "test.csv"
    TEST_DEMOGRAPHICS = BASE_DIR / "test_demographics.csv"
    TRAIN_CSV = BASE_DIR / "train.csv"
    TRAIN_DEMOGRAPHICS = BASE_DIR / "train_demographics.csv"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("▶ imports ready · torch", torch.__version__, "device :", device)

▶ imports ready · torch 2.7.1+cu128 device : cuda


In [3]:
def seed_everything(seed=42):
    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if using multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [4]:

class MotionDataset(torch.utils.data.Dataset):
    def __init__(self, X, y, alpha=0.2):
        """
        X: np.array or torch.Tensor of shape (N, )
        y: np.array or torch.Tensor if shaoe (N, )
        alpha: Beta distribution parameter for mixup
        """
        self.X = torch.tensor(X, dtype=torch.float32) if isinstance(X, np.ndarray) else X
        self.y = torch.tensor(y, dtype=torch.float32) if isinstance(y, np.ndarray) else y
        self.alpha = alpha

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x1, y1 = self.X[idx], self.y[idx]
        
        # Create shuffle tensor
        shuffle_index = np.random.randint(0, len(self.X))
        x2, y2 = self.X[shuffle_index], self.y[shuffle_index]       

        # Mix
        weight = np.random.beta(self.alpha, self.alpha)
        x_mix = x1 * weight + x2 * (1 - weight)
        y_mix = y1 * weight + y2 * (1 - weight)

        return x_mix, y_mix
    
# train_dataset = MixupDataset(config, df_train, X_tr, y_tr, y_soft_tr)
# train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE_TRAIN, shuffle=True)
# val_dataset = CustomDataset(config, df_train, X_val, y_val, y_soft_val)
# val_loader = DataLoader(val_dataset, batch_size=config.BATCH_SIZE_VALID, shuffle=True)

def pad_or_truncate(seq, max_len, mode=TRAIN, pad_value=0.0, dtype=np.float32) -> np.ndarray:
    """
    Pads or truncates a sequence to a fixed length.

    Parameters:
    - seq: np.ndarray of shape (L, D)
    - max_len: int, desired sequence length
    - mode: bool, True = random pad, False = regular pad
    - pad_value: float or int, value to use for padding
    - dtype: np.dtype, dtype for the output array

    Returns:
    - np.ndarray of shape (max_len, D)
    """
    # print("sequence shape", seq.shape)
    L, D = seq.shape
    # print("mode = ", mode)

    if L > max_len:
        return seq[:max_len] # truncate if too long

    elif L < max_len:
        total_padding = max_len - L
        
        if mode:
            pad_start = np.random.randint(0, total_padding + 1)
            pad_end = total_padding - pad_start
            
        else:
            pad_start = 0
            pad_end = total_padding

        start_padding = np.full((pad_start, D), pad_value, dtype=dtype)
        end_padding = np.full((pad_end, D), pad_value, dtype=dtype)
        padded = np.vstack((start_padding, seq, end_padding))
        # print("padded shape", padded.shape)
        return padded

    else:
        return seq.astype(dtype)

In [5]:
def remove_gravity_from_acc(acc_data, rot_data):

    if isinstance(acc_data, pd.DataFrame):
        acc_values = acc_data[['acc_x', 'acc_y', 'acc_z']].values
    else:
        acc_values = acc_data

    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = acc_values.shape[0]
    linear_accel = np.zeros_like(acc_values)
    
    gravity_world = np.array([0, 0, 9.81])

    for i in range(num_samples):
        if np.all(np.isnan(quat_values[i])) or np.all(np.isclose(quat_values[i], 0)):
            linear_accel[i, :] = acc_values[i, :] 
            continue

        try:
            rotation = R.from_quat(quat_values[i])
            gravity_sensor_frame = rotation.apply(gravity_world, inverse=True)
            linear_accel[i, :] = acc_values[i, :] - gravity_sensor_frame
        except ValueError:
             linear_accel[i, :] = acc_values[i, :]
             
    return linear_accel

def calculate_angular_velocity_from_quat(rot_data, time_delta=1/200): # Assuming 200Hz sampling rate
    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = quat_values.shape[0]
    angular_vel = np.zeros((num_samples, 3))

    for i in range(num_samples - 1):
        q_t = quat_values[i]
        q_t_plus_dt = quat_values[i+1]

        if np.all(np.isnan(q_t)) or np.all(np.isclose(q_t, 0)) or \
           np.all(np.isnan(q_t_plus_dt)) or np.all(np.isclose(q_t_plus_dt, 0)):
            continue

        try:
            rot_t = R.from_quat(q_t)
            rot_t_plus_dt = R.from_quat(q_t_plus_dt)

            # Calculate the relative rotation
            delta_rot = rot_t.inv() * rot_t_plus_dt
            
            # Convert delta rotation to angular velocity vector
            # The rotation vector (Euler axis * angle) scaled by 1/dt
            # is a good approximation for small delta_rot
            angular_vel[i, :] = delta_rot.as_rotvec() / time_delta
        except ValueError:
            # If quaternion is invalid, angular velocity remains zero
            pass
            
    return angular_vel

def calculate_angular_distance(rot_data):
    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = quat_values.shape[0]
    angular_dist = np.zeros(num_samples)

    for i in range(num_samples - 1):
        q1 = quat_values[i]
        q2 = quat_values[i+1]

        if np.all(np.isnan(q1)) or np.all(np.isclose(q1, 0)) or \
           np.all(np.isnan(q2)) or np.all(np.isclose(q2, 0)):
            angular_dist[i] = 0 # Или np.nan, в зависимости от желаемого поведения
            continue
        try:
            # Преобразование кватернионов в объекты Rotation
            r1 = R.from_quat(q1)
            r2 = R.from_quat(q2)

            # Вычисление углового расстояния: 2 * arccos(|real(p * q*)|)
            # где p* - сопряженный кватернион q
            # В scipy.spatial.transform.Rotation, r1.inv() * r2 дает относительное вращение.
            # Угол этого относительного вращения - это и есть угловое расстояние.
            relative_rotation = r1.inv() * r2
            
            # Угол rotation vector соответствует угловому расстоянию
            # Норма rotation vector - это угол в радианах
            angle = np.linalg.norm(relative_rotation.as_rotvec())
            angular_dist[i] = angle
        except ValueError:
            angular_dist[i] = 0 # В случае недействительных кватернионов
            pass
            
    return angular_dist

def print_memory():
    process = psutil.Process()
    print(f"Memory Usage: {process.memory_info().rss / 1024**2:.2f} MB")

def parse_tof_column(col):
    # Match patterns like 'tof_1_v42' or 'tof_1_v42_norm'
    match = re.match(r"tof_(\d+)_v(\d+)", col)
    if match:
        sensor_num = int(match.group(1))
        pixel_num = int(match.group(2))
        return (sensor_num, pixel_num)
    else:
        return (float('inf'), float('inf'))  # put unmatchable columns at the end

In [None]:

class SEBlock1D(nn.Module):
    def __init__(self, channels, reduction=8):
        super().__init__()
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc1 = nn.Linear(channels, channels // reduction)
        self.fc2 = nn.Linear(channels // reduction, channels)

    def forward(self, x):
        # x shape: (batch, channels, time)
        b, c, _ = x.size()
        y = self.global_avg_pool(x).view(b, c)
        y = F.relu(self.fc1(y))
        y = torch.sigmoid(self.fc2(y)).view(b, c, 1)
        return x * y.expand_as(x)

class ResidualSEBlock1D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, pool_size=2, dropout=0.3):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, padding=kernel_size // 2, bias=False)
        self.bn1 = nn.GroupNorm(num_groups=8, num_channels=out_channels), #BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, padding=kernel_size // 2, bias=False)
        self.bn2 = nn.GroupNorm(num_groups=8, num_channels=out_channels), #BatchNorm1d(out_channels)
        self.se = SEBlock1D(out_channels)
        
        self.match_channels = None
        if in_channels != out_channels:
            self.match_channels = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, kernel_size=1, padding=0, bias=False),
                nn.GroupNorm(num_groups=8, num_channels=out_channels), #BatchNorm1d(out_channels)
            )
        
        self.pool = nn.MaxPool1d(pool_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        identity = x
        out = F.gelu(self.bn1(self.conv1(x)))  #relu
        out = F.gelu(self.bn2(self.conv2(out)))  #relu
        out = self.se(out)

        if self.match_channels is not None:
            identity = self.match_channels(identity)
        
        out = F.gelu(out + identity)  #relu
        out = self.pool(out)
        out = self.dropout(out)
        return out

class AttentionLayer(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.attn_fc = nn.Linear(input_dim, 1)

    def forward(self, x):
        # x shape: (batch, time, features)
        # Compute scores with tanh activation
        scores = torch.tanh(self.attn_fc(x))  # (batch, time, 1)
        scores = scores.squeeze(-1)           # (batch, time)

        # Softmax over time dimension to get weights
        weights = F.softmax(scores, dim=1)    # (batch, time)
        weights = weights.unsqueeze(-1)       # (batch, time, 1)

        # Weighted sum of input features over time
        context = (x * weights).sum(dim=1)    # (batch, features)
        return context
    

class TwoBranchGestureModel(nn.Module):
    def __init__(self, imu_dim, tof_dim, n_classes, wd=1e-4):
        super().__init__()
        self.imu_dim = imu_dim
        self.tof_dim = tof_dim
        
        # IMU deep branch
        self.imu_branch = nn.Sequential(
            ResidualSEBlock1D(imu_dim, 64, kernel_size=3, dropout=0.1),
            ResidualSEBlock1D(64, 128, kernel_size=5, dropout=0.1)
        )

        # TOF Lighter branch
        self.tof_branch = nn.Sequential(
            nn.Conv1d(tof_dim, 64, kernel_size=3, padding=1, bias=False),
            nn.GroupNorm(num_groups=8, num_channels=64),  #BatchNorm1d(64),
            nn.GELU(), #ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(0.2),
            nn.Conv1d(64, 128, kernel_size=3, padding=1, bias=False),
            nn.GroupNorm(num_groups=8, num_channels=64),  #BatchNorm1d(128),
            nn.GELU(), #ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(0.2),
        )

        self.lstm = nn.LSTM(256, 128, batch_first=True, bidirectional=True)
        self.gru = nn.GRU(256, 128, batch_first=True, bidirectional=True)

        # Gaussian noise (manual) and projection
        self.projection = nn.Sequential(
            nn.Dropout(0.09),
            nn.Linear(256, 16),
            nn.ELU()
        )

        self.pre_attn_dropout = nn.Dropout(0.2)
        self.attn = AttentionLayer(512)

        # Dense layer
        self.mlp = nn.Sequential(
            nn.Linear(512, 128, bias=False),
            nn.BatchNorm1d(128),
            nn.GELU(), #ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 64, bias=False),
            nn.GELU(), #ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, n_classes),  # Softmax handled by loss (e.g., CrossEntropyLoss)
        )
    
    def forward(self, x):
        imu = x[:, :, :self.imu_dim].permute(0, 2, 1) #(B, imu_dim, T)
        tof = x[: ,:, self.imu_dim:].permute(0, 2, 1) #(B, tof_dim, T)

        imu_feat = self.imu_branch(imu)
        tof_feat = self.tof_branch(tof)

        imu_feat = imu_feat.permute(0, 2, 1)
        tof_feat = tof_feat.permute(0, 2, 1)

        merged = torch.cat([imu_feat, tof_feat], dim=-1)  #(B, T', 256)

        if self.training:
            noise = torch.randn_like(merged) * 0.015
            merged = merged + noise

        xa, _ = self.lstm(merged)
        xb, _ = self.gru(merged)
        # xc = self.projection(merged)

        x_cat = torch.cat([xa, xb], dim=-1)  #(B, T', 512)
        x_cat = self.pre_attn_dropout(x_cat)
        context = self.attn(x_cat)

        return self.mlp(context)

In [7]:
# ####  DATA SAMPLE to delete
# df_data = pd.read_csv(paths.TRAIN_CSV, nrows=5000)
# df = df_data.fillna(0)

# le = LabelEncoder()
# df['gesture_int'] = le.fit_transform(df['gesture'])
# np.save(paths.OUTPUT_DIR / "gesture_classes.npy", le.classes_)

# # print(df[['gesture_int', 'gesture', 'acc_x']].groupby('gesture').first())

# seq_gp = df.groupby('sequence_id') 
# seq_id, group_df = next(iter(seq_gp))
# # print("seq id", seq_id)
# # print("group df:", group_df[['gesture_int', 'gesture', 'acc_x']][:3])
# # print("df", df[['gesture_int', 'gesture', 'acc_x']][:3])

# all_steps_for_scaler_list = []
# X_list_unscaled, y_list_int_for_stratify, lens = [], [], [] 

# for seq_id, seq_df_orig in seq_gp:
#     seq_df = seq_df_orig.copy()

#     y_list_int_for_stratify.append(seq_df['gesture_int'].iloc[0])

# # print(y_list_int_for_stratify[:10])

# labels_tensor = torch.tensor(df['gesture_int'].values, dtype=torch.long)
# one_hot_tensor = F.one_hot(labels_tensor, num_classes=len(le.classes_))
# df['gesture_int_oh'] = one_hot_tensor.numpy().tolist()  # now each cell is a list

# subset_df = df[['gesture_int', 'gesture_int_oh']].head(500)
# subset_df.to_csv('gesture_with_onehot.csv', index=False)




# class TwoBranchGestureModel(nn.Module):
#     def __init__(self, imu_dim, tof_dim, n_classes, wd=config.WD):
#         super().__init__()
#         self.imu_dim = imu_dim
#         self.tof_dim = tof_dim
        
#         # IMU deep branch
#         self.imu_branch = nn.Sequential(
#             ResidualSEBlock1D(imu_dim, 32, kernel_size=3, dropout=0.4),  #64 0.1
#             ResidualSEBlock1D(32, 64, kernel_size=5, dropout=0.3)   #64, 128        
#             )

#         # TOF Lighter branch
#         self.tof_branch = nn.Sequential(
#             nn.Conv1d(tof_dim, 32, kernel_size=3, padding=1, bias=False),  #64
#             nn.BatchNorm1d(32),
#             nn.ReLU(),
#             nn.MaxPool1d(2),
#             nn.Dropout(0.4), #0.2
#             nn.Conv1d(32, 64, kernel_size=3, padding=1, bias=False),
#             nn.BatchNorm1d(64),
#             nn.MaxPool1d(2),
#             nn.Dropout(0.3),  #0.3
#         )

#         # self.lstm = nn.LSTM(128, 64, batch_first=True, bidirectional=True)  #256, 128
#         self.gru = nn.GRU(128, 64, batch_first=True, bidirectional=True)

#         # Gaussian noise (manual) and projection
#         # self.projection = nn.Sequential(
#         #     nn.Dropout(0.1),  #0.09
#         #     nn.Linear(128, 16),  #256
#         #     nn.ELU()
#         # )
#         self.projection = nn.AdaptiveAvgPool1d(1)

#         self.pre_attn_dropout = nn.Dropout(0.4)
#         self.attn = AttentionLayer(128)  #128*2 + 128*2 + 16

#         # Dense layer
#         self.mlp = nn.Sequential(
#             nn.Linear(128, 128, bias=False),   #528 , 256
#             nn.BatchNorm1d(128),
#             nn.ReLU(),
#             nn.Dropout(0.5),
#             nn.Linear(128, 64, bias=False),
#             nn.ReLU(),
#             nn.Dropout(0.4),
#             nn.Linear(64, n_classes),  # Softmax handled by loss (e.g., CrossEntropyLoss)
#         )
    
#     def forward(self, x):
#         imu = x[:, :, :self.imu_dim].permute(0, 2, 1) #(B, imu_dim, T)
#         tof = x[: ,:, self.imu_dim:].permute(0, 2, 1) #(B, tof_dim, T)

#         imu_feat = self.imu_branch(imu)
#         tof_feat = self.tof_branch(tof)

#         imu_feat = imu_feat.permute(0, 2, 1)
#         tof_feat = tof_feat.permute(0, 2, 1)

#         merged = torch.cat([imu_feat, tof_feat], dim=-1)  #(B, T', 256)

#         # xa, _ = self.lstm(merged)
#         xb, _ = self.gru(merged)
#         # xb = xb.permute(0, 2, 1) 
#         # xb_pooled = F.adaptive_avg_pool1d(xb, 1).squeeze(-1)  # [B, F]

#         xb = self.pre_attn_dropout(xb) # Add dropout before attention
#         # xc = self.projection(merged)
#         # xc = self.projection(merged.permute(0, 2, 1)).squeeze(-1)  # [B, F]
#         # print(f"xa shape {xa.shape} / xb shape {xb.shape} / xc shape {xc.shape}")

#         attended_output = self.attn(xb) # Assuming attn takes (B, T, F) and outputs (B, F)
#         # x_cat = torch.cat([xb, xc], dim=-1)  #(B, T', 512)
#         # x_cat = self.pre_attn_dropout(x_cat)
#         # x_cat = torch.cat([xb_pooled, xc], dim=-1)  # (B, features, T)
#         # context = F.adaptive_avg_pool1d(x_cat, 1).squeeze(-1)  # (B, features)
#         # context = self.attn(x_cat)

#         return self.mlp(attended_output)


In [8]:
### DATA CREATION and PRE PROCESSING

print("▶ TRAIN MODE – loading dataset …")

df_data = pd.read_csv(paths.TRAIN_CSV)
df_data = df_data.fillna(0)

train_dem_df = pd.read_csv(paths.TRAIN_DEMOGRAPHICS)
df = pd.merge(df_data.copy(), train_dem_df, on='subject', how='left')
print("merged df shape :", df.shape)

le = LabelEncoder()
df['gesture_int'] = le.fit_transform(df['gesture'])
np.save(paths.OUTPUT_DIR / "gesture_classes.npy", le.classes_)
gesture_classes = le.classes_

print_memory()

print(" 0/6 Calculating elbow_to_wrist_cm shoulder_to_wrist_cm adjustment ...")

df["acc_x_norm_ew"] = df["acc_x"] / df["elbow_to_wrist_cm"]
df["acc_y_norm_ew"] = df["acc_y"] / df["elbow_to_wrist_cm"]
df["acc_z_norm_ew"] = df["acc_z"] / df["elbow_to_wrist_cm"]

df["acc_x_norm_sw"] = df["acc_x"] / df["shoulder_to_wrist_cm"]
df["acc_y_norm_sw"] = df["acc_y"] / df["shoulder_to_wrist_cm"]
df["acc_z_norm_sw"] = df["acc_z"] / df["shoulder_to_wrist_cm"]

print(" 1/6 Calculating base engineered IMU features (magnitude, angle) ...")

df['acc_mag'] = np.sqrt(df['acc_x']**2 + df['acc_y']**2 + df['acc_z']**2)
df['rot_angle'] = 2* np.arccos(df['rot_w'].clip(-1, 1))

print(" 2/6 Calculating engineered IMU derivatives (jerk, angular velocity) for original acc_mag ...")

df['acc_mag_jerk'] = df.groupby('sequence_id')['acc_mag'].diff().fillna(0)
df['rot_angle_vel'] = df.groupby('sequence_id')['rot_angle'].diff().fillna(0)

print(" 3/6 Removing gravity and calculating linear acceleration features...")

linear_accel_list = []
for _, group in df.groupby('sequence_id'):
    acc_data_group = group[['acc_x', 'acc_y', 'acc_z']]
    rot_data_group = group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
    linear_accel_group = remove_gravity_from_acc(acc_data_group, rot_data_group)
    linear_accel_list.append(pd.DataFrame(linear_accel_group, columns=['linear_acc_x', 'linear_acc_y', 'linear_acc_z'], index=group.index))

df_linear_accel = pd.concat(linear_accel_list)
df = pd.concat([df, df_linear_accel], axis=1)
del df_linear_accel, linear_accel_list  # Memory Management
gc.collect()  # Memory Management

df['linear_acc_mag'] = np.sqrt(df['linear_acc_x']**2 + df['linear_acc_y']**2 + df['linear_acc_z']**2)
df['linear_acc_mag_jerk'] = df.groupby('sequence_id')['linear_acc_mag'].diff().fillna(0)

print(" 4/6 Calculating angular velocity from quaternion derivatives...")
angular_vel_list = []
for _, group in df.groupby('sequence_id'):
    rot_data_group = group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
    angular_vel_group = calculate_angular_velocity_from_quat(rot_data_group)
    angular_vel_list.append(pd.DataFrame(angular_vel_group, columns=['angular_vel_x', 'angular_vel_y', 'angular_vel_z'], index=group.index))

df_angular_vel = pd.concat(angular_vel_list)
df = pd.concat([df, df_angular_vel], axis=1)
del angular_vel_list, df_angular_vel # Memory Management
gc.collect() # Memory Management

print(" 5/6 Calculating angular distance between successive quaternions...")
angular_distance_list = []
for _, group in df.groupby('sequence_id'):
    rot_data_group = group[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
    angular_dist_group = calculate_angular_distance(rot_data_group)
    angular_distance_list.append(pd.DataFrame(angular_dist_group, columns=['angular_distance'], index=group.index))

df_angular_distance = pd.concat(angular_distance_list)
df = pd.concat([df, df_angular_distance], axis=1)
del angular_distance_list, df_angular_distance # Memory Management
gc.collect() # Memory Management

print_memory()

meta_cols = { } # This was an empty dict in your provided code, keeping it as is.

print(" 6/6 Calculating imu_cols_base ...")
imu_cols_orig = ['acc_x', 'acc_y', 'acc_z',
            'rot_w', 'rot_x', 'rot_y', 'rot_z',
            'thm_1', 'thm_2', 'thm_3', 'thm_4', 'thm_5']

imu_cols_base = ['linear_acc_x', 'linear_acc_y', 'linear_acc_z']
imu_cols_base.extend([c for c in df.columns if c.startswith('rot_') and c not in ['rot_angle', 'rot_angle_vel']])

imu_engineered_features = [
    'acc_x_norm_ew', 'acc_y_norm_ew', 'acc_z_norm_ew',  # new from demographics
    'acc_x_norm_sw', 'acc_y_norm_sw', 'acc_z_norm_sw',  # new from demographics
    'acc_mag', 'rot_angle',
    'acc_mag_jerk', 'rot_angle_vel',
    'linear_acc_mag', 'linear_acc_mag_jerk',
    'angular_vel_x', 'angular_vel_y', 'angular_vel_z', # Existing new features
    'angular_distance' # Added new feature
]

dem_features = [
    'adult_child', 'age',
    'sex', 'handedness',
]

imu_cols = list(dict.fromkeys(imu_cols_orig + imu_cols_base + imu_engineered_features + dem_features))  # Remove dups

print("length of imu_cols :", len(imu_cols), "Obtaining tof columns ......")

tof_columns = [col for col in df.columns if col.startswith("tof_")]
tof_columns = sorted(tof_columns, key=parse_tof_column)

sequence_ids = df["sequence_id"].unique()

print("tof_columns length :", len(tof_columns))

del imu_cols_orig, imu_cols_base, imu_engineered_features, dem_features # Memory Management
gc.collect() # Memory Management

print("✅ Preprocessing done.")
print_memory()

# thm_cols_original = [c for c in df.columns if c.startswith('thm_')

▶ TRAIN MODE – loading dataset …
merged df shape : (574945, 348)
Memory Usage: 3649.21 MB
 0/6 Calculating elbow_to_wrist_cm shoulder_to_wrist_cm adjustment ...
 1/6 Calculating base engineered IMU features (magnitude, angle) ...
 2/6 Calculating engineered IMU derivatives (jerk, angular velocity) for original acc_mag ...
 3/6 Removing gravity and calculating linear acceleration features...
 4/6 Calculating angular velocity from quaternion derivatives...
 5/6 Calculating angular distance between successive quaternions...
Memory Usage: 5366.58 MB
 6/6 Calculating imu_cols_base ...
length of imu_cols : 35 Obtaining tof columns ......
tof_columns length : 320
✅ Preprocessing done.
Memory Usage: 5366.59 MB


In [9]:
### DATA CONFIGURATION

# Estimate the max length
sequence_lengths = df.groupby('sequence_id').size().values  # length of each sequence
SEQUENCE_LENGTH = int(np.percentile(sequence_lengths, 95))
print("SEQUENCE_LENGTH :", SEQUENCE_LENGTH)

X_2dim = df[imu_cols + tof_columns]
X_list = []
y_list = []

for seq_id, group in df.groupby('sequence_id', sort=False):
    X_seq = group[imu_cols + tof_columns].values.astype(np.float32)
    X_list.append(X_seq)
    y_list.append(group['gesture_int'].iloc[0])

X_padded = np.stack([pad_or_truncate(seq, SEQUENCE_LENGTH) for seq in X_list])
X = torch.tensor(X_padded, dtype=torch.float32)
y = F.one_hot(torch.tensor(np.array(y_list)), num_classes=len(le.classes_)).float()
print(f"X shape {X.shape} | y shape {y.shape}")

X_tr, X_val, y_tr, y_val = train_test_split(
    X, y,
    test_size=0.2,  # 20% validation
    random_state=42,
    stratify=df.groupby("sequence_id")["gesture"].first()  # keeps gesture label distribution balanced
)

## Sanity Check
for i, (seq_id, group) in enumerate(df.groupby('sequence_id', sort=False)):
    assert y_list[i] == group['gesture_int'].iloc[0]
print("length of imu", len(imu_cols))
print("length of tof", len(tof_columns))

train_dataset = MotionDataset(X_tr, y_tr, alpha=0.2)
val_dataset   = MotionDataset(X_val, y_val, alpha=0.2)

train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE_TRAIN, shuffle=True, num_workers=0)
val_loader   = DataLoader(val_dataset, batch_size=config.BATCH_SIZE_VALID, shuffle=False, num_workers=0)

del X_list, y_list, X_padded

print_memory()

SEQUENCE_LENGTH : 127
X shape torch.Size([8151, 127, 355]) | y shape torch.Size([8151, 18])
length of imu 35
length of tof 320
Memory Usage: 9749.54 MB


In [10]:

model = TwoBranchGestureModel(
    imu_dim=len(imu_cols),         # channels per node (ToF + IMU)
    tof_dim=len(tof_columns),         # channels per node (ToF + IMU)
    n_classes=len(df["gesture"].unique()),  # e.g., 20
).to(device)

In [11]:
print("⏩ training started .....")

sequence_labels = df.groupby('sequence_id').first()['gesture_int'].values
print(np.unique(sequence_labels)) 
cw_vals = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(sequence_labels),
    y=torch.argmax(y, dim=1).numpy()
    )
print(cw_vals.shape)  # should be (num_classes,)
optimizer = torch.optim.AdamW(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WD)
weights_tensor = torch.tensor(cw_vals, dtype=torch.float32).to(device)
loss_fn = nn.CrossEntropyLoss(weight=weights_tensor, label_smoothing=0.1)
# loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)

print("▶️ Setting scheduler  .....")
steps = []
lrs = []
best_val_acc = 0
patience, patience_counter = 10, 0
EPOCHS = config.EPOCHS
scheduler = ReduceLROnPlateau(
    optimizer,
    mode='max',
    factor=0.5,
    patience=3,
    # verbose=True, 
    threshold=0.0001,
    threshold_mode='rel',
)

print("✅ Epoch starts .....")
import itertools

max_batches = 5


for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0         # <-- reset here
    total = 0           # <-- reset here
    # for batch_idx, batch in tqdm.tqdm(enumerate(itertools.islice(train_loader, max_batches))):        
    for batch_idx, batch in tqdm.tqdm(enumerate(train_loader)):
        xb, yb = batch[0].to(device), batch[1].to(device)
        # if batch_idx == 0:
        #     print(f"Batch {batch_idx}: x_imu shape {x_imu.shape}, x_tof shape {x_tof.shape}")

        # if batch_idx == 0:
        #     print(f"Batch {batch_idx}: x_imu shape {xb.shape}, x_tof shape {yb.shape}")
        optimizer.zero_grad()        
        logits = model(xb)
        # print("logits.shape:", logits.shape)
        # print("yb_indices.shape:", yb.shape)
        yb_indices = yb.argmax(dim=1)
        loss = loss_fn(logits, yb_indices)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # optional
        optimizer.step()        

        total_loss += loss.item()
        lrs.append(scheduler.get_last_lr()[0])
        steps.append(epoch * config.BATCH_SIZE_TRAIN + batch_idx)
        
        logits_arg = logits.argmax(dim=1)
        correct += (logits_arg == yb_indices).sum().item()
        total += yb_indices.size(0)

    train_acc = correct / total
    current_lr = optimizer.param_groups[0]['lr']
    print(f"Epoch {epoch} | Train Loss: {total_loss / len(train_loader):.4f} | Train Acc: {train_acc:.4f} | lr = {current_lr:.6f}")
    
    # Validation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for batch in val_loader:
            xb, yb = batch[0].to(device), batch[1].to(device)

            x_preds = model(xb)
            logits = x_preds.argmax(dim=1)
            true_labels = yb.argmax(1) if yb.ndim > 1 else yb  #.argmax(1)  val_loader comes from a standard dataset with "y" as class index (long), you don’t need argmax.
            correct += (logits == true_labels).sum().item()
            total += true_labels.size(0)
    val_acc = correct / total
    print(f"Epoch {epoch} | Val Acc: {val_acc:.4f}")

    scheduler.step(val_acc)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        patience_counter = 0
        torch.save(model.state_dict(), paths.OUTPUT_DIR / "best_model.pt")
    else:
        patience_counter += 1
        print("patience_counter :", patience_counter)
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break
print(f"Best Model : {best_val_acc:.4f}")

⏩ training started .....
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17]
(18,)
▶️ Setting scheduler  .....
✅ Epoch starts .....


51it [00:04, 12.17it/s]


Epoch 0 | Train Loss: 2.5386 | Train Acc: 0.2259 | lr = 0.002000
Epoch 0 | Val Acc: 0.3495


51it [00:03, 16.66it/s]


Epoch 1 | Train Loss: 2.0398 | Train Acc: 0.3956 | lr = 0.002000
Epoch 1 | Val Acc: 0.3998


51it [00:03, 16.24it/s]


Epoch 2 | Train Loss: 1.8911 | Train Acc: 0.4656 | lr = 0.002000
Epoch 2 | Val Acc: 0.4838


51it [00:03, 16.91it/s]


Epoch 3 | Train Loss: 1.8404 | Train Acc: 0.5018 | lr = 0.002000
Epoch 3 | Val Acc: 0.5285


51it [00:03, 16.93it/s]


Epoch 4 | Train Loss: 1.8073 | Train Acc: 0.5491 | lr = 0.002000
Epoch 4 | Val Acc: 0.5782


51it [00:02, 17.50it/s]


Epoch 5 | Train Loss: 1.6903 | Train Acc: 0.5753 | lr = 0.002000
Epoch 5 | Val Acc: 0.5947


51it [00:03, 16.66it/s]


Epoch 6 | Train Loss: 1.6465 | Train Acc: 0.5974 | lr = 0.002000
Epoch 6 | Val Acc: 0.5788
patience_counter : 1


51it [00:02, 17.90it/s]


Epoch 7 | Train Loss: 1.6271 | Train Acc: 0.6126 | lr = 0.002000
Epoch 7 | Val Acc: 0.5849
patience_counter : 2


51it [00:02, 18.12it/s]


Epoch 8 | Train Loss: 1.5964 | Train Acc: 0.6235 | lr = 0.002000
Epoch 8 | Val Acc: 0.6119


51it [00:02, 17.12it/s]


Epoch 9 | Train Loss: 1.5795 | Train Acc: 0.6399 | lr = 0.002000
Epoch 9 | Val Acc: 0.6186


51it [00:03, 16.80it/s]


Epoch 10 | Train Loss: 1.5579 | Train Acc: 0.6500 | lr = 0.002000
Epoch 10 | Val Acc: 0.6358


51it [00:02, 17.07it/s]


Epoch 11 | Train Loss: 1.5276 | Train Acc: 0.6563 | lr = 0.002000
Epoch 11 | Val Acc: 0.6156
patience_counter : 1


51it [00:02, 17.97it/s]


Epoch 12 | Train Loss: 1.5244 | Train Acc: 0.6675 | lr = 0.002000
Epoch 12 | Val Acc: 0.6303
patience_counter : 2


51it [00:02, 17.76it/s]


Epoch 13 | Train Loss: 1.5303 | Train Acc: 0.6706 | lr = 0.002000
Epoch 13 | Val Acc: 0.5181
patience_counter : 3


51it [00:02, 17.63it/s]


Epoch 14 | Train Loss: 1.5073 | Train Acc: 0.6845 | lr = 0.002000
Epoch 14 | Val Acc: 0.6291
patience_counter : 4


51it [00:03, 15.63it/s]


Epoch 15 | Train Loss: 1.4318 | Train Acc: 0.7112 | lr = 0.001000
Epoch 15 | Val Acc: 0.6910


51it [00:03, 15.31it/s]


Epoch 16 | Train Loss: 1.4012 | Train Acc: 0.7316 | lr = 0.001000
Epoch 16 | Val Acc: 0.6511
patience_counter : 1


51it [00:03, 14.94it/s]


Epoch 17 | Train Loss: 1.3944 | Train Acc: 0.7456 | lr = 0.001000
Epoch 17 | Val Acc: 0.6732
patience_counter : 2


51it [00:03, 15.96it/s]


Epoch 18 | Train Loss: 1.3758 | Train Acc: 0.7439 | lr = 0.001000
Epoch 18 | Val Acc: 0.6934


51it [00:03, 14.54it/s]


Epoch 19 | Train Loss: 1.3680 | Train Acc: 0.7495 | lr = 0.001000
Epoch 19 | Val Acc: 0.6836
patience_counter : 1


51it [00:03, 15.45it/s]


Epoch 20 | Train Loss: 1.3471 | Train Acc: 0.7569 | lr = 0.001000
Epoch 20 | Val Acc: 0.7247


51it [00:03, 14.68it/s]


Epoch 21 | Train Loss: 1.3263 | Train Acc: 0.7660 | lr = 0.001000
Epoch 21 | Val Acc: 0.7143
patience_counter : 1


51it [00:03, 14.26it/s]


Epoch 22 | Train Loss: 1.3401 | Train Acc: 0.7729 | lr = 0.001000
Epoch 22 | Val Acc: 0.6830
patience_counter : 2


51it [00:03, 15.67it/s]


Epoch 23 | Train Loss: 1.3305 | Train Acc: 0.7801 | lr = 0.001000
Epoch 23 | Val Acc: 0.6990
patience_counter : 3


51it [00:03, 14.04it/s]


Epoch 24 | Train Loss: 1.3259 | Train Acc: 0.7775 | lr = 0.001000
Epoch 24 | Val Acc: 0.6996
patience_counter : 4


51it [00:04, 12.49it/s]


Epoch 25 | Train Loss: 1.3034 | Train Acc: 0.7923 | lr = 0.000500
Epoch 25 | Val Acc: 0.7376


51it [00:04, 12.13it/s]


Epoch 26 | Train Loss: 1.2713 | Train Acc: 0.8071 | lr = 0.000500
Epoch 26 | Val Acc: 0.7069
patience_counter : 1


51it [00:03, 14.82it/s]


Epoch 27 | Train Loss: 1.2592 | Train Acc: 0.8137 | lr = 0.000500
Epoch 27 | Val Acc: 0.7167
patience_counter : 2


51it [00:03, 15.69it/s]


Epoch 28 | Train Loss: 1.2577 | Train Acc: 0.8163 | lr = 0.000500
Epoch 28 | Val Acc: 0.7278
patience_counter : 3


51it [00:03, 16.36it/s]


Epoch 29 | Train Loss: 1.2398 | Train Acc: 0.8173 | lr = 0.000500
Epoch 29 | Val Acc: 0.7051
patience_counter : 4


51it [00:03, 16.95it/s]


Epoch 30 | Train Loss: 1.2252 | Train Acc: 0.8347 | lr = 0.000250
Epoch 30 | Val Acc: 0.7370
patience_counter : 5


51it [00:03, 14.67it/s]


Epoch 31 | Train Loss: 1.1966 | Train Acc: 0.8339 | lr = 0.000250
Epoch 31 | Val Acc: 0.7357
patience_counter : 6


51it [00:03, 15.79it/s]


Epoch 32 | Train Loss: 1.2008 | Train Acc: 0.8344 | lr = 0.000250
Epoch 32 | Val Acc: 0.7198
patience_counter : 7


51it [00:03, 14.71it/s]


Epoch 33 | Train Loss: 1.2007 | Train Acc: 0.8433 | lr = 0.000250
Epoch 33 | Val Acc: 0.7357
patience_counter : 8


51it [00:03, 15.25it/s]


Epoch 34 | Train Loss: 1.1873 | Train Acc: 0.8489 | lr = 0.000125
Epoch 34 | Val Acc: 0.7253
patience_counter : 9


51it [00:03, 15.17it/s]


Epoch 35 | Train Loss: 1.1800 | Train Acc: 0.8491 | lr = 0.000125
Epoch 35 | Val Acc: 0.7357
patience_counter : 10
Early stopping triggered.
Best Model : 0.7376
