# 🧠 Multi-Patient Baseline Model Training (PADS)
Train an XGBoost classifier on real smartwatch sensor data from multiple Parkinson’s and healthy patients.

In [None]:
# 📦 Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
# 📁 Load Data
X_all = np.load('X_all.npy')
y_task_all = np.load('y_task_all.npy')
y_cond_all = np.load('y_cond_all.npy')

print('X shape:', X_all.shape)
print('Unique Task Labels:', np.unique(y_task_all))
print('Unique Condition Labels:', np.unique(y_cond_all))

In [None]:
# 🧹 Preprocessing: Flatten Time-Series for XGBoost
X_flat = X_all.reshape((X_all.shape[0], -1))
print('Flattened shape:', X_flat.shape)

In [None]:
# 📊 Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X_flat, y_task_all, test_size=0.25, random_state=42)
print(f'Train samples: {len(y_train)}, Test samples: {len(y_test)}')

In [None]:
# ⚙️ Train XGBoost Classifier
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

In [None]:
# 📈 Evaluate Performance
preds = model.predict(X_test)
print(classification_report(y_test, preds))

In [None]:
# 📉 Confusion Matrix
cm = confusion_matrix(y_test, preds, labels=np.unique(y_task_all))
plt.figure(figsize=(10, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y_task_all), yticklabels=np.unique(y_task_all))
plt.title('Confusion Matrix – Task Prediction')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.tight_layout()
plt.show()