# üìì Human Activity Classification from Sensor Data

### Instructions for Participants

- All necessary code is already provided.  
- You should **not need to write new functions** unless you want to experiment.  
- Your tasks: tweak parameters, run experiments, interpret results, and answer the reflection prompts in markdown.  
- Use the dataset ‚Äúmovement_dataset_windows.csv‚Äù (or raw logs) to proceed.

### Misc

This exercise is based on: https://github.com/mattiasahle/DT374B_Machine_Learning_and_Data_Acquisition/tree/master

Jupyter Notebook in VSCode: https://code.visualstudio.com/docs/datascience/jupyter-notebooks 

# TODO

- Regenerate this file to add:
    - More different ML algos
    - More extplanations (the why) surrounding the code
- Add mock dataset

## Pre-reqs

`pip install numpy pandas matplotlib seaborn scikit-learn`

## üèÅ Step 0: Imports and Settings

In [None]:
# Human Activity Classification from Sensor Datae
# Description: Classify human activities (jump, run, walk, pushup) from accelerometer, gyroscope and magnetometer recordings.

# ---
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

sns.set(style='whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

DATA_PATH = "./data"  # Change this if your data folder is elsewhere
ACTIVITIES = ['jump', 'run', 'walk', 'pushup']
SENSORS = ['acc', 'gyro', 'mag']
RECORDINGS = [1, 2, 3, 4, 5, 6]  # 6 recordings per activity

## üì• Step 1: Load and Visualize Example Sensor Data

In [None]:
def load_sensor_file(activity, sensor, recording_num):
    """Loads a single sensor CSV file and returns a DataFrame."""
    filename = f"{activity}_{sensor}{recording_num}.csv"
    filepath = os.path.join(DATA_PATH, filename)
    df = pd.read_csv(filepath)
    df['Activity'] = activity
    df['Sensor'] = sensor
    df['Recording'] = recording_num
    return df

# Example: Visualize Jumping accelerometer data (recording 1)
example_df = load_sensor_file('jump', 'acc', 1)

# Plot signal
plt.plot(example_df['Milliseconds'], example_df['X'], label='X')
plt.plot(example_df['Milliseconds'], example_df['Y'], label='Y')
plt.plot(example_df['Milliseconds'], example_df['Z'], label='Z')
plt.title("Accelerometer Signal - Jumping (Recording 1)")
plt.xlabel("Milliseconds")
plt.ylabel("Acceleration")
plt.legend()
plt.show()

example_df.head()

## üß™ Step 2: Feature Extraction

In [None]:
# Define basic statistical feature extraction
def extract_features(df):
    """Extracts statistical features from each sensor axis."""
    features = {}
    for axis in ['X', 'Y', 'Z']:
        data = df[axis]
        features[f'{axis}_mean'] = np.mean(data)
        features[f'{axis}_std'] = np.std(data)
        features[f'{axis}_max'] = np.max(data)
        features[f'{axis}_min'] = np.min(data)
        features[f'{axis}_median'] = np.median(data)
    return features

# Collect features across all activities/sensors/recordings
feature_rows = []

for activity in ACTIVITIES:
    for rec in RECORDINGS:
        row = {'Activity': activity}
        for sensor in SENSORS:
            df = load_sensor_file(activity, sensor, rec)
            feats = extract_features(df)
            for k, v in feats.items():
                row[f"{sensor}_{k}"] = v
        feature_rows.append(row)

features_df = pd.DataFrame(feature_rows)
features_df.head()

## üìä Step 3: Visualize Feature Space

In [None]:
# Pairplot to visualize separability
sns.pairplot(features_df, hue='Activity',
             vars=[col for col in features_df.columns if 'acc_X' in col or 'gyro_X' in col])
plt.suptitle("Feature Space (Subset of Features)", y=1.02)
plt.show()

## ü§ñ Step 4: Train a Classifier

In [None]:
# Prepare data
X = features_df.drop(columns=['Activity'])
y = features_df['Activity']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Train classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Evaluate
y_pred = knn.predict(X_test)
print(classification_report(y_test, y_pred))

## üîç Step 5: Confusion Matrix

In [None]:
# Confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=ACTIVITIES)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=ACTIVITIES, yticklabels=ACTIVITIES)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

## üß™ Step 6: Test on Continuous Movement Recording

In [None]:
# Load continuous test file
def load_continuous(sensor, participant=1):
    filename = f"run_walk_jump_pushup_{sensor}{participant}.csv"
    filepath = os.path.join(DATA_PATH, filename)
    df = pd.read_csv(filepath)
    df['Sensor'] = sensor
    return df

# Segment continuous data into windows (e.g., every 2s ~ 200 samples)
def sliding_window_features(sensor_data, window_size=200, step=100):
    features = []
    positions = []
    for start in range(0, len(sensor_data) - window_size, step):
        window = sensor_data.iloc[start:start+window_size]
        feats = extract_features(window)
        feats['Position'] = start
        features.append(feats)
        positions.append(start)
    return pd.DataFrame(features), positions

# Load all 3 sensors
acc_df = load_continuous('acc')
gyro_df = load_continuous('gyro')
mag_df = load_continuous('mag')

# Extract features from all three and merge
acc_feats, pos = sliding_window_features(acc_df)
gyro_feats, _ = sliding_window_features(gyro_df)
mag_feats, _ = sliding_window_features(mag_df)

combined = pd.concat([acc_feats.add_prefix("acc_"), 
                      gyro_feats.add_prefix("gyro_"), 
                      mag_feats.add_prefix("mag_")], axis=1)
combined['Position'] = pos

# Scale using previous scaler
X_comb = scaler.transform(combined.drop(columns='Position'))

# Predict activities
combined['Predicted'] = knn.predict(X_comb)

## üìà Step 7: Visualize Classification Over Time

In [None]:
# Plot predictions over time
plt.figure(figsize=(15, 4))
plt.plot(combined['Position'], combined['Predicted'], marker='o', linestyle='-', alpha=0.7)
plt.title("Predicted Activity Over Time (Continuous Recording)")
plt.xlabel("Sample Start Position (Milliseconds)")
plt.ylabel("Predicted Activity")
plt.grid(True)
plt.show()

## üß™ Optional: Try Your Own Classifier

In [None]:
# Optional: Uncomment to try a different model
# from sklearn.ensemble import RandomForestClassifier
# clf = RandomForestClassifier(n_estimators=100)
# clf.fit(X_train, y_train)
# print(classification_report(y_test, clf.predict(X_test)))

In [None]:
# Prepare Time-Series Inputs for RNN

# Re-load or re‚Äëderive raw windows array (X_windows) and labels (y_windows)
# Suppose we stored them earlier as X_windows_full, y_windows_full
# Reshape: (num_windows, time_steps, axes_count)
# For this template, we assume we have them:

# Placeholder: reshape X (flattened) back to windows of shape (num_windows, 200, 6)
num_windows = X.shape[0]
X_windows = X.reshape(num_windows, 200, 6)  # only valid if original flattening was consistent
# Re-split
Xw_train, Xw_temp, yw_train, yw_temp = train_test_split(X_windows, y, test_size=0.4, stratify=y, random_state=42)
Xw_val, Xw_test, yw_val, yw_test = train_test_split(Xw_temp, yw_temp, test_size=0.5, stratify=yw_temp, random_state=42)
# One-hot encode labels
from tensorflow.keras.utils import to_categorical
classes = sorted(list(set(y)))
cls_to_idx = {c:i for i,c in enumerate(classes)}
yw_train_idx = np.array([cls_to_idx[c] for c in yw_train])
yw_val_idx = np.array([cls_to_idx[c] for c in yw_val])
yw_test_idx = np.array([cls_to_idx[c] for c in yw_test])
yw_train_cat = to_categorical(yw_train_idx, num_classes=len(classes))
yw_val_cat = to_categorical(yw_val_idx, num_classes=len(classes))

## ‚úÖ Done!

You‚Äôve now:
- Loaded and visualized raw sensor data
- Extracted statistical features from 3-axis sensors
- Trained a classifier to detect activity type
- Evaluated it using a confusion matrix
- Applied it to a continuous movement stream for testing
- Visualized activity predictions over time