In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#score - 0.68

In [None]:
import pandas as pd
import numpy as np

In [None]:
df1_demo=pd.read_csv("/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv")
df1_demo.head()

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis, iqr, entropy
from scipy.signal import welch
from scipy.fft import fft, fftfreq
import pywt

def zero_crossing_rate(signal):
    return np.mean(np.diff(np.sign(signal)) != 0)

def mean_crossing_rate(signal):
    return np.mean(np.diff(np.sign(signal - np.mean(signal))) != 0)

def signal_energy(signal):
    return np.sum(np.square(signal))

def get_fft_features(signal, fs=50):
    signal = signal - np.mean(signal)
    N = len(signal)
    if N <= 1:
        return np.nan, np.nan, np.nan, np.nan
    yf = np.abs(fft(signal))[:N // 2]
    freqs = fftfreq(N, d=1 / fs)[:N // 2]
    if yf.sum() == 0:
        return 0, 0, 0, 0
    dominant_freq = freqs[np.argmax(yf)]
    spectral_centroid = np.sum(freqs * yf) / np.sum(yf)
    power = yf ** 2
    band_power = np.sum(power)
    p = power / np.sum(power)
    spectral_entropy = entropy(p)
    return dominant_freq, spectral_centroid, spectral_entropy, band_power

def get_wavelet_features(signal, wavelet='db4'):
    if len(signal) < 8:
        return [np.nan] * 4
    coeffs = pywt.wavedec(signal, wavelet, level=2)
    features = []
    for c in coeffs[1:]:  # skip approximation
        features.append(np.mean(np.square(c)))  # energy
    return features

def extract_imu_features(segment):
    features = {}
    acc_cols = ['acc_x', 'acc_y', 'acc_z']
    rot_cols = ['rot_x', 'rot_y', 'rot_z', 'rot_w']

    for col in acc_cols + rot_cols:
        signal = segment[col].dropna().values
        if signal.size == 0:
            for suffix in ['mean', 'std', 'min', 'max', 'range', 'median', 'iqr', 'skew', 'kurt',
                           'energy', 'zcr', 'mcr', 'fft_dom', 'fft_centroid', 'fft_entropy', 'fft_bandpow',
                           'wav_energy_1', 'wav_energy_2']:
                features[f'{col}_{suffix}'] = np.nan
            continue

        features[f'{col}_mean'] = np.mean(signal)
        features[f'{col}_std'] = np.std(signal)
        features[f'{col}_min'] = np.min(signal)
        features[f'{col}_max'] = np.max(signal)
        features[f'{col}_range'] = np.ptp(signal)
        features[f'{col}_median'] = np.median(signal)
        features[f'{col}_iqr'] = iqr(signal)
        features[f'{col}_skew'] = skew(signal)
        features[f'{col}_kurt'] = kurtosis(signal)
        features[f'{col}_energy'] = signal_energy(signal)
        features[f'{col}_zcr'] = zero_crossing_rate(signal)
        features[f'{col}_mcr'] = mean_crossing_rate(signal)

        # FFT-based features
        dom_freq, centroid, spec_ent, bandpow = get_fft_features(signal)
        features[f'{col}_fft_dom'] = dom_freq
        features[f'{col}_fft_centroid'] = centroid
        features[f'{col}_fft_entropy'] = spec_ent
        features[f'{col}_fft_bandpow'] = bandpow

        # Wavelet
        wav_feats = get_wavelet_features(signal)
        features[f'{col}_wav_energy_1'] = wav_feats[0]
        features[f'{col}_wav_energy_2'] = wav_feats[1]

    # Jerk (first derivative of acc)
    acc_data = segment[acc_cols].dropna()
    if not acc_data.empty:
        jerk = np.diff(acc_data.values, axis=0)
        jerk_mag = np.linalg.norm(jerk, axis=1)
        features['jerk_mean'] = np.mean(jerk_mag)
        features['jerk_std'] = np.std(jerk_mag)
    else:
        features['jerk_mean'] = np.nan
        features['jerk_std'] = np.nan

    # Magnitude of acc and rot
    for prefix, cols in zip(['acc', 'rot'], [acc_cols, rot_cols[:3]]):
        vec = segment[cols].dropna()
        if not vec.empty:
            mag = np.linalg.norm(vec.values, axis=1)
            features[f'{prefix}_mag_mean'] = np.mean(mag)
            features[f'{prefix}_mag_std'] = np.std(mag)
        else:
            features[f'{prefix}_mag_mean'] = np.nan
            features[f'{prefix}_mag_std'] = np.nan

    return features


all_imu_features = []

for seq_id, segment in df1_demo.groupby('sequence_id'):
    feats = extract_imu_features(segment)
    feats['sequence_id'] = seq_id
    all_imu_features.append(feats)

df_imu_features = pd.DataFrame(all_imu_features)
df_imu_features.head()

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis

def extract_thermopile_features(segment):
    thm_cols = [f'thm_{i}' for i in range(1, 6)]
    features = {}

    thm_data = segment[thm_cols].copy()

    # --- Per-sensor features ---
    for col in thm_cols:
        signal = thm_data[col].dropna().values

        if signal.size == 0:
            features[f'{col}_mean'] = np.nan
            features[f'{col}_std'] = np.nan
            features[f'{col}_range'] = np.nan
            features[f'{col}_skew'] = np.nan
            features[f'{col}_kurtosis'] = np.nan
            features[f'{col}_slope'] = np.nan
            features[f'{col}_rolling_mean_5'] = np.nan
            features[f'{col}_rolling_mean_10'] = np.nan
        else:
            features[f'{col}_mean'] = np.mean(signal)
            features[f'{col}_std'] = np.std(signal)
            features[f'{col}_range'] = np.ptp(signal)
            features[f'{col}_skew'] = skew(signal)
            features[f'{col}_kurtosis'] = kurtosis(signal)

            # Temporal slope = (last - first) / n
            features[f'{col}_slope'] = (signal[-1] - signal[0]) / max(1, len(signal))

            # Rolling window means (pad with same value to preserve shape)
            rolling_5 = pd.Series(signal).rolling(window=5, min_periods=1).mean()
            rolling_10 = pd.Series(signal).rolling(window=10, min_periods=1).mean()
            features[f'{col}_rolling_mean_5'] = rolling_5.mean()
            features[f'{col}_rolling_mean_10'] = rolling_10.mean()

    # --- Pairwise differences (asymmetry detection) ---
    for i in range(5):
        for j in range(i + 1, 5):
            diff = thm_data[thm_cols[i]] - thm_data[thm_cols[j]]
            diff_clean = diff.dropna().values
            if diff_clean.size > 0:
                features[f'diff_{thm_cols[i]}_{thm_cols[j]}_mean'] = np.mean(diff_clean)
                features[f'diff_{thm_cols[i]}_{thm_cols[j]}_std'] = np.std(diff_clean)
            else:
                features[f'diff_{thm_cols[i]}_{thm_cols[j]}_mean'] = np.nan
                features[f'diff_{thm_cols[i]}_{thm_cols[j]}_std'] = np.nan

    # --- Global features ---
    thm_means = thm_data.mean(axis=0).values
    sensor_positions = np.array([0, 1, 2, 3, 4])  # assume uniform sensor layout

    if not np.isnan(thm_means).all():
        weighted_sum = np.sum(sensor_positions * thm_means)
        features['thm_center_of_mass'] = weighted_sum / np.sum(thm_means)
        features['thm_global_mean'] = np.nanmean(thm_means)
        features['thm_global_std'] = np.nanstd(thm_means)
        features['thm_global_range'] = np.nanmax(thm_means) - np.nanmin(thm_means)
    else:
        features['thm_center_of_mass'] = np.nan
        features['thm_global_mean'] = np.nan
        features['thm_global_std'] = np.nan
        features['thm_global_range'] = np.nan

    return features


all_features = []
for seq_id, segment in df1_demo.groupby('sequence_id'):
    feats = extract_thermopile_features(segment)
    feats['sequence_id'] = seq_id
    all_features.append(feats)

df_therm = pd.DataFrame(all_features)
df_therm

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import entropy

def extract_tof_features_grouped(segment):
    features = {}
    sensor_ids = range(1, 6)
    pixel_ids = range(64)

    for sensor in sensor_ids:
        pixel_cols = [f"tof_{sensor}_v{i}" for i in pixel_ids if f"tof_{sensor}_v{i}" in segment.columns]

        if not pixel_cols:
            continue

        pixel_data = segment[pixel_cols].replace(-1, np.nan).values.astype(float)

        # Per-sensor stats
        features[f'tof_{sensor}_mean'] = np.nanmean(pixel_data)
        features[f'tof_{sensor}_std'] = np.nanstd(pixel_data)
        features[f'tof_{sensor}_min'] = np.nanmin(pixel_data)
        features[f'tof_{sensor}_max'] = np.nanmax(pixel_data)
        features[f'tof_{sensor}_missing_rate'] = np.isnan(pixel_data).sum() / pixel_data.size
        features[f'tof_{sensor}_nonzero_ratio'] = np.count_nonzero(~np.isnan(pixel_data)) / pixel_data.size

        # Spatial entropy over all frames and pixels
        flat_vals = pixel_data[~np.isnan(pixel_data)].flatten()
        if flat_vals.size > 0:
            hist, _ = np.histogram(flat_vals, bins=10, density=True)
            hist = hist[hist > 0]
            features[f'tof_{sensor}_spatial_entropy'] = entropy(hist)
        else:
            features[f'tof_{sensor}_spatial_entropy'] = np.nan

        # Center of mass (average across time steps)
        try:
            com_x_list, com_y_list = [], []
            for row in pixel_data:
                if np.isnan(row).all():
                    continue
                grid = row.reshape(8, 8)
                xx, yy = np.meshgrid(np.arange(8), np.arange(8))
                grid_masked = np.nan_to_num(grid, nan=0)
                total = np.sum(grid_masked)
                if total > 0:
                    com_x = np.sum(xx * grid_masked) / total
                    com_y = np.sum(yy * grid_masked) / total
                    com_x_list.append(com_x)
                    com_y_list.append(com_y)

            if com_x_list:
                features[f'tof_{sensor}_com_x'] = np.mean(com_x_list)
                features[f'tof_{sensor}_com_y'] = np.mean(com_y_list)
            else:
                features[f'tof_{sensor}_com_x'] = np.nan
                features[f'tof_{sensor}_com_y'] = np.nan
        except:
            features[f'tof_{sensor}_com_x'] = np.nan
            features[f'tof_{sensor}_com_y'] = np.nan

    return features


# Assuming df1_demo contains your full dataset
all_tof_features = []

for seq_id, segment in df1_demo.groupby('sequence_id'):
    feats = extract_tof_features_grouped(segment)
    feats['sequence_id'] = seq_id
    all_tof_features.append(feats)

df_tof = pd.DataFrame(all_tof_features)
df_tof.head()

In [None]:
df_tof.head()

In [None]:
def extract_tof_temporal_features(df):
    temporal_feats = []
    tof_cols = [col for col in df.columns if col.startswith("tof_") and '_v' in col]

    for seq_id, segment in df.groupby("sequence_id"):
        segment_sorted = segment.sort_values("sequence_counter")[tof_cols].replace(-1, np.nan).fillna(0).values
        diffs = np.diff(segment_sorted, axis=0)  # Shape: (n-1, 320)

        feats = {
            'sequence_id': seq_id,
            'tof_diff_mean': np.mean(diffs),
            'tof_diff_std': np.std(diffs),
            'tof_diff_max': np.max(diffs),
            'tof_diff_min': np.min(diffs),
        }
        temporal_feats.append(feats)

    return pd.DataFrame(temporal_feats)


df_tof_temp=extract_tof_temporal_features(df1_demo)
df_tof_temp.head()

In [None]:
dup_gest=df1_demo[['sequence_id','gesture']].drop_duplicates()
all_data=df_imu_features.merge(df_therm,on=['sequence_id']).merge(df_tof,on=['sequence_id']).merge(df_tof_temp,on=['sequence_id']).merge(dup_gest,on=['sequence_id'])
all_data.head()

In [None]:
# df2_demo=pd.read_csv("/kaggle/input/cmi-detect-behavior-with-sensor-data/train_demographics.csv")
# df2_demo.head()

In [None]:
# df3_demo=df1_demo.merge(df2_demo,on=['subject'],how='left')
# df3_demo.head()

In [None]:
# test_df=pd.read_csv("/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv")
# test_demo=pd.read_csv("/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv")
# test_df=test_df.merge(test_demo,on=['subject'],how='left')
# test_df_1=test_df.drop(['row_id', 'sequence_id','sequence_counter','subject'],axis=1)
# test_df_1.head()

In [None]:
# df3_demo=df3_demo.fillna(0)

In [None]:
# test_df_1=test_df_1.fillna(0)

In [None]:
# X1=df3_demo.drop(['row_id', 'sequence_id','sequence_counter','subject'],axis=1)
# X=df3_demo.drop(['row_id', 'sequence_id','sequence_counter','subject','gesture'],axis=1)
# y=df3_demo['gesture']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

# def encode_categorical_columns(df):
#     df_encoded = X1.copy()
#     encoders = {}
    
#     for col in df_encoded.columns:
#         if df_encoded[col].dtype == 'object' or str(df_encoded[col].dtype) == 'category':
#             #le = LabelEncoder()
#             df_encoded[col] = le.fit_transform(df_encoded[col])
#             encoders[col] = le  # Store encoder for inverse transform later
#             print(f"Encoded '{col}' → {list(le.classes_)}")
    
#     return df_encoded, encoders

# df_encoded, encoders = encode_categorical_columns(X1)

# print("\nEncoded DataFrame:\n", df_encoded)


In [None]:
#dep_var=X1['gesture']
#dep_var_1=le.fit(dep_var)

In [None]:
from sklearn.preprocessing import LabelEncoder

# def encode_categorical_columns(df):
#     test_encoded = test_df_1.copy()
#     test_encoders = {}
    
#     for col in test_encoded.columns:
#         if test_encoded[col].dtype == 'object' or str(test_encoded[col].dtype) == 'category':
#             #le = LabelEncoder()
#             test_encoded[col] = le.fit_transform(test_encoded[col])
#             test_encoders[col] = le  # Store encoder for inverse transform later
#             print(f"Encoded '{col}' → {list(le.classes_)}")
    
#     return test_encoded, test_encoders

# test_encoded, test_encoders = encode_categorical_columns(test_df_1)

# print("\nEncoded DataFrame:\n", test_encoded)


In [None]:
# from sklearn.preprocessing import MinMaxScaler
# # Initialize the scaler
# scaler = MinMaxScaler()

# # Fit and transform the data
# scaled_data = scaler.fit_transform(X2)

# # Convert back to DataFrame (optional)
# scaled_df = pd.DataFrame(scaled_data, columns=X2.columns)

In [None]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.pipeline import Pipeline
from sklearn.datasets import fetch_openml

In [None]:
# # Split
# # Step 2: Split into Train (60%), Validation (20%), Test (20%)
# y=df_encoded['gesture']
# X3=df3_demo[test_df_1.columns]

# # Split numeric and categorical
# num_cols = X3.select_dtypes(include=['int64', 'float64']).columns.tolist()
# cat_cols = X3.select_dtypes(include=['object', 'category']).columns.tolist()

# # Pipelines for each type
# numeric_transformer = Pipeline([
#     ('scaler', StandardScaler()),
#     ('pca', PCA(n_components=50))  # Choose n_components based on variance or elbow
# ])

# categorical_transformer = Pipeline([
#     ('onehot', OneHotEncoder(handle_unknown='ignore', sparse=False)),
#     ('svd', TruncatedSVD(n_components=1))  # Works on high-dimensional one-hot data
# ])

# # Combine transformations
# preprocessor = ColumnTransformer(
#     transformers=[
#         ('num', numeric_transformer, num_cols),
#         ('cat', categorical_transformer, cat_cols)
#     ]
# )

# # Apply transformation
# X_reduced = preprocessor.fit_transform(X3)

In [None]:
X=all_data.drop(['gesture','sequence_id'],axis=1)
y_b4=all_data['gesture']
y=pd.DataFrame(le.fit_transform(y_b4))

In [None]:
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.25, random_state=42, stratify=y_train_full)
# (0.25 of 0.8 = 0.2 --> 60/20/20 split)

In [None]:
from xgboost import XGBClassifier
from sklearn.datasets import load_iris  # you can use your dataset instead
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import lightgbm as lgb

In [None]:
# Step 3: Define XGBoost multi-class classifier
model = XGBClassifier(
    objective='multi:softprob',
    num_class=len(y.drop_duplicates()),
    eval_metric='mlogloss',
    use_label_encoder=True,
    random_state=42,
    enable_categorical='True'
)

In [None]:
# Train the model on selected features
model.fit(X_train, y_train, 
          eval_set=[(X_val, y_val)],
          early_stopping_rounds=10,
          verbose=False)

In [None]:
from xgboost import XGBClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Step 5: Evaluate on validation set
val_preds = model.predict(X_val)
val_accuracy = accuracy_score(y_val, val_preds)
print("Validation Accuracy:", val_accuracy)
print("\nValidation Classification Report:\n", classification_report(y_val, val_preds))

In [None]:
# Step 6: Final test evaluation
test_preds = model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_preds)
print("Test Accuracy:", test_accuracy)
print("\nTest Classification Report:\n", classification_report(y_test, test_preds))

In [None]:
X_train_full_1, X_test_1, y_train_full_1, y_test_1 = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

X_train_1, X_val_1, y_train_1, y_val_1 = train_test_split(
    X_train_full_1, y_train_full_1, test_size=0.25, random_state=42, stratify=y_train_full)
# (0.25 of 0.8 = 0.2 --> 60/20/20 split)

In [None]:

test_preds=pd.DataFrame(model.predict(X_test))
test_preds.columns=['gesture_pred']


In [None]:
# binary f1 score and macro f1 for validation data set
dup_test=df1_demo[['sequence_type','gesture']].drop_duplicates()
dup_test.columns=['seq_type_pred','gesture_pred']
# Rename one or more columns

dup_org=df1_demo[['sequence_type','gesture']].drop_duplicates()


test_preds=pd.DataFrame(model.predict(X_test))
test_preds.columns=['gesture_pred']
test_preds=pd.DataFrame(le.inverse_transform(test_preds['gesture_pred']))
test_preds.columns=['gesture_pred']
X_test_1['gesture_pred']=test_preds
#X_test_1['gesture_pred']=le.inverse_transform(X_test_1['gesture_pred'])
test_preds=test_preds.merge(dup_test,on=['gesture_pred'],how='left')
test_preds.head()

y_test_1=pd.DataFrame(le.inverse_transform(y_test))
y_test_1.columns=['gesture']
y_test_1=y_test_1.merge(dup_org,on=['gesture'],how='left')
y_test_1.head()

y_test_1['sequence_type']=np.where(y_test_1['sequence_type']=='Target',1,0)
test_preds['seq_type_pred']=np.where(test_preds['seq_type_pred']=='Target',1,0)


test_preds['gesture_pred']=np.where(test_preds['seq_type_pred']==1,test_preds['gesture_pred'],"Non-Target")
y_test_1['gesture']=np.where(y_test_1['sequence_type']==1,y_test_1['gesture'],"Non-Target")


from sklearn.metrics import f1_score
# Compute binary F1 score
f1 = f1_score(y_test_1['sequence_type'], test_preds['seq_type_pred'])
print(f"Binary F1 Score (Target vs Non-Target): {f1:.4f}")

# Compute macro F1 score
f1_macro = f1_score(y_test_1['gesture'], test_preds['gesture_pred'], average='macro')
print(f"Macro F1 Score (with non-target collapsed): {f1_macro:.4f}")

final_score=(f1+f1_macro)/2
print(f"final overall score:{final_score:.4f}")

In [None]:
# binary f1 score and macro f1 for validation data set
dup_test=df1_demo[['sequence_type','gesture']].drop_duplicates()
dup_test.columns=['seq_type_pred','gesture_pred']
# Rename one or more columns

dup_org=df1_demo[['sequence_type','gesture']].drop_duplicates()


val_preds=pd.DataFrame(model.predict(X_val))
val_preds.columns=['gesture_pred']
val_preds['gesture_pred']=le.inverse_transform(val_preds['gesture_pred'])
val_preds=val_preds.merge(dup_test,on=['gesture_pred'],how='left')
val_preds.head()

y_val_1=pd.DataFrame(le.inverse_transform(y_val))
y_val_1.columns=['gesture']
y_val_1=y_val_1.merge(dup_org,on=['gesture'],how='left')
y_val_1.head()

y_val_1['sequence_type']=np.where(y_val_1['sequence_type']=='Target',1,0)
val_preds['seq_type_pred']=np.where(val_preds['seq_type_pred']=='Target',1,0)


val_preds['gesture_pred']=np.where(val_preds['seq_type_pred']==1,val_preds['gesture_pred'],"Non-Target")
y_val_1['gesture']=np.where(y_val_1['sequence_type']==1,y_val_1['gesture'],"Non-Target")


from sklearn.metrics import f1_score
# Compute binary F1 score
f1 = f1_score(y_val_1['sequence_type'], val_preds['seq_type_pred'])
print(f"Binary F1 Score (Target vs Non-Target): {f1:.4f}")

# Compute macro F1 score
f1_macro = f1_score(y_val_1['gesture'], val_preds['gesture_pred'], average='macro')
print(f"Macro F1 Score (with non-target collapsed): {f1_macro:.4f}")

final_score=(f1+f1_macro)/2
print(f"final overall score:{final_score:.4f}")

In [None]:
test_df=pd.read_csv("/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv")

for seq_id, segment in test_df.groupby('sequence_id'):
    feats = extract_imu_features(segment)
    feats['sequence_id'] = seq_id
    all_imu_features.append(feats)

test_imu_features = pd.DataFrame(all_imu_features)
test_imu_features.head()

all_features = []
for seq_id, segment in test_df.groupby('sequence_id'):
    feats = extract_thermopile_features(segment)
    feats['sequence_id'] = seq_id
    all_features.append(feats)

test_therm = pd.DataFrame(all_features)
test_therm



# Assuming df1_demo contains your full dataset
all_tof_features = []

for seq_id, segment in test_df.groupby('sequence_id'):
    feats = extract_tof_features_grouped(segment)
    feats['sequence_id'] = seq_id
    all_tof_features.append(feats)

test_tof = pd.DataFrame(all_tof_features)
test_tof.head()


test_tof_temp=extract_tof_temporal_features(test_df)
test_tof_temp.head()

In [None]:
test_data=test_imu_features.merge(test_therm,on=['sequence_id']).merge(test_tof,on=['sequence_id']).merge(test_tof_temp,on=['sequence_id'])
test_data.head()
test_data_v1=test_data.drop(['sequence_id'],axis=1)


In [None]:


test_data['gesture_pred']=pd.DataFrame(model.predict(test_data_v1))
test_data['gesture_pred']=le.inverse_transform(test_data['gesture_pred'])
test_df=test_df.merge(test_data[['sequence_id','gesture_pred']],on=['sequence_id'])
test_df_final=test_df[['sequence_id','gesture_pred']]
test_df_final.columns=['sequence_id','gesture']
#dup_org=df1_demo[['sequence_type','gesture']].drop_duplicates()
#test_df_final=test_df_final.merge(dup_org,on=['gesture'],how='left')
#test_df_final['gesture']=np.where(test_df_final['sequence_type']=="Target",test_df_final['gesture'],"Non-Target")
#test_df_final=test_df_final[['sequence_id','gesture']]
test_df_final.head()

In [None]:
test_df_final.to_parquet("submission.parquet", index=False)

In [None]:
import os

import pandas as pd
import polars as pl

import kaggle_evaluation.cmi_inference_server



In [None]:
def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    test_df=  sequence.to_pandas()
    test_demo= demographics.to_pandas()
    # test_df=test_df.merge(test_demo,on=['subject'],how='left')
    # test_df_1=test_df.drop(['row_id', 'sequence_id','sequence_counter','subject'],axis=1)
    # test_df_1=test_df_1.fillna(0)
    # test_X_reduced = preprocessor.fit_transform(test_df_1)


    for seq_id, segment in test_df.groupby('sequence_id'):
        feats = extract_imu_features(segment)
        feats['sequence_id'] = seq_id
        all_imu_features.append(feats)

    test_imu_features = pd.DataFrame(all_imu_features)
    test_imu_features.head()


    all_features = []
    for seq_id, segment in test_df.groupby('sequence_id'):
        feats = extract_thermopile_features(segment)
        feats['sequence_id'] = seq_id
        all_features.append(feats)

    test_therm = pd.DataFrame(all_features)
    test_therm


    # Assuming df1_demo contains your full dataset
    all_tof_features = []

    for seq_id, segment in test_df.groupby('sequence_id'):
        feats = extract_tof_features_grouped(segment)
        feats['sequence_id'] = seq_id
        all_tof_features.append(feats)

    test_tof = pd.DataFrame(all_tof_features)
    test_tof.head()


    test_tof_temp=extract_tof_temporal_features(test_df)
    test_tof_temp.head()

    test_data=test_imu_features.merge(test_therm,on=['sequence_id']).merge(test_tof,on=['sequence_id']).merge(test_tof_temp,on=['sequence_id'])
    test_data_v1=test_data.drop(['sequence_id'],axis=1)

    
    
    test_data['gesture_pred']=pd.DataFrame(model.predict(test_data_v1))
    test_data['gesture_pred']=le.inverse_transform(test_data['gesture_pred'])
    test_df=test_df.merge(test_data[['sequence_id','gesture_pred']],on=['sequence_id'])
    test_df_final=test_df[['sequence_id','gesture_pred']]
    test_df_final.columns=['sequence_id','gesture']
    
    
    
    return str(test_df_final['gesture'][0])

In [None]:
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
        )
    )

In [None]:
#sample synethetic data bro


# # --- 1. Generate Synthetic Sensor Data (Replace with your actual data loading) ---
# # For demonstration purposes, let's create some synthetic time-series sensor data.
# # Imagine 1000 samples, each with 50 time steps and 3 sensor features (e.g., X, Y, Z acceleration)

# num_samples = 1000
# time_steps = 50
# num_features = 3

# # Simulate different patterns for two classes
# # Class 0: More sinusoidal
# t = np.linspace(0, 10 * np.pi, time_steps)
# data_class_0 = np.array([np.sin(t + np.random.rand() * 2 * np.pi) * 0.5 + np.random.rand(time_steps) * 0.1 for _ in range(num_samples // 2)])
# data_class_0 = np.stack([data_class_0, data_class_0 * 0.8, data_class_0 * 1.2], axis=-1) # Add 3 features

# # Class 1: More spikey/noisy
# data_class_1 = np.array([np.random.rand(time_steps) * 0.8 + np.sin(t / 2 + np.random.rand() * 2 * np.pi) * 0.2 for _ in range(num_samples // 2)])
# data_class_1 = np.stack([data_class_1 * 1.5, data_class_1, data_class_1 * 0.7], axis=-1) # Add 3 features

# X = np.vstack((data_class_0, data_class_1))