Testing the inference function!!!!

***

In [1]:
import numpy as np
import pandas as pd
import joblib
from scipy.spatial.transform import Rotation as R
from sklearn.impute import SimpleImputer
# Note: RobustScaler is not needed here as we LOAD the fitted one
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 1. LOAD YOUR FITTED SCALER (Do this once, outside the function)
# Ensure you saved this after training using joblib.dump(scaler, 'scaler.pkl')
# scaler = joblib.load('/kaggle/working/scaler.pkl') 

def process_single_sequence(df_seq: pd.DataFrame) -> np.ndarray:
    df_seq = df_seq.copy()

    df_seq.drop_duplicates(inplace=True)

    # replace -1 and 0 with NaN in TOF columns
    tof_cols = [c for c in df_seq.columns if c.startswith('tof_')]
    if tof_cols:
        df_seq[tof_cols] = df_seq[tof_cols].replace([-1, 0], np.nan)

    # impute missing rot values with 0
    rots = ["rot_w", "rot_x", "rot_y", "rot_z"]
    # Check if columns exist before accessing
    existing_rots = [c for c in rots if c in df_seq.columns]
    if existing_rots:
        df_seq[existing_rots] = df_seq[existing_rots].fillna(0)

    # fill missing temperature data with average by column (Constant 0 strategy)
    thms = ["thm_1", "thm_2", "thm_3", "thm_4", "thm_5"]
    existing_thms = [c for c in thms if c in df_seq.columns]
    if existing_thms:
        # No need for SimpleImputer object for constant fill
        df_seq[existing_thms] = df_seq[existing_thms].fillna(0)

    # fill all NaNs in time-of-flight (ToF) columns with 400
    if tof_cols:
        df_seq[tof_cols] = df_seq[tof_cols].fillna(400)

    # linear interpolation for missing acceleration data
    acc_cols = ["acc_x", "acc_y", "acc_z"]
    if all(c in df_seq.columns for c in acc_cols):
        df_seq[acc_cols] = df_seq[acc_cols].interpolate(method='linear', limit_direction='both')

    # drop unnecessary columns (Safely)
    columns_to_drop = ['row_id', 'sequence_type', 'sequence_counter', 'subject', 'orientation', 'behavior', 'phase']
    df_seq = df_seq.drop(columns=[col for col in columns_to_drop if col in df_seq.columns])
    
    new_features_df = calculate_sequence_features(df_seq)
    
    # Concatenate
    df_processed = pd.concat([df_seq, new_features_df], axis=1)
    
    # Reorder columns to match training
    NEW_IMU_FEATURES = [
        'lin_acc_x', 'lin_acc_y', 'lin_acc_z',
        'acc_mag', 'lin_acc_mag', 
        'acc_mag_jerk', 'lin_acc_mag_jerk', 
        'rot_angle', 'angular_distance', 'rot_angle_vel', 
        'angular_vel_x', 'angular_vel_y', 'angular_vel_z'
    ]
    
    id_cols = ['sequence_id']
    if 'gesture' in df_processed.columns:
        id_cols.append('gesture')
    
    original_cols = [
        col for col in df_processed.columns 
        if col not in id_cols and col not in NEW_IMU_FEATURES
    ]
    
    final_order = id_cols + NEW_IMU_FEATURES + original_cols
    # Ensure only valid columns are selected
    final_order = [c for c in final_order if c in df_processed.columns]
    
    df_processed = df_processed[final_order]

    # --- C. SCALING ---
    # ðŸ’¡ IMPORTANT: Load the global scaler. Do NOT fit() a new one here.
    cols_to_exclude = ['sequence_id', 'gesture'] # exclude ID and Target if present
    cols_to_scale = [c for c in df_processed.columns if c not in cols_to_exclude]
    
    # Transform using the PRE-FITTED scaler
    df_processed[cols_to_scale] = scaler.transform(df_processed[cols_to_scale])

    # --- D. PADDING ---
    # Drop IDs before padding if the model doesn't expect them
    if 'sequence_id' in df_processed.columns:
        df_processed = df_processed.drop(columns=['sequence_id'])
    if 'gesture' in df_processed.columns:
        df_processed = df_processed.drop(columns=['gesture'])
        
    # Wrap in list to create a batch of size 1: [Sequence_Array]
    padded_sequences = pad_sequences([df_processed.values], 
                                     padding='post', 
                                     dtype='float32', 
                                     maxlen=700)
    
    # Result shape is now (1, 700, 344)
    return padded_sequences