# 🧠 Baseline Model Training: PADS Dataset
This notebook loads preprocessed sensor data and trains baseline ML models for task classification.

In [None]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier
import seaborn as sns
import os
import json

In [None]:
# Load preprocessed data (e.g., '001_ml.bin')
path = '001_ml.bin'
data = np.fromfile(path, dtype=np.float32).reshape(-1, 6)
data.shape

In [None]:
# Create windowed segments for modeling
def window_data(data, size=256, stride=128):
    windows = []
    for start in range(0, data.shape[0] - size + 1, stride):
        windows.append(data[start:start + size])
    return np.stack(windows)

X = window_data(data)
X.shape

In [None]:
# Load label info from JSON file
with open('observation_001.json') as f:
    obs = json.load(f)

# Create dummy task labels for demonstration
num_windows = X.shape[0]
tasks = ['Relaxed', 'RelaxedTask', 'StretchHold', 'LiftHold', 'HoldWeight',
         'DrinkGlas', 'CrossArms', 'TouchNose', 'Entrainment']
y = np.random.choice(tasks, num_windows)  # Fake labels for testing

In [None]:
# Flatten for XGBoost (time * features)
X_flat = X.reshape((X.shape[0], -1))
print('X shape for XGBoost:', X_flat.shape)

In [None]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_flat, y, test_size=0.2, random_state=42)

In [None]:
# Train baseline XGBoost model
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

In [None]:
# Evaluate
preds = model.predict(X_test)
print(classification_report(y_test, preds))

cm = confusion_matrix(y_test, preds, labels=tasks)
sns.heatmap(cm, annot=True, fmt='d', xticklabels=tasks, yticklabels=tasks, cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()