# 📊 PADS Dataset Preprocessing Notebook

This notebook loads and visualizes smartwatch sensor data for personalized Parkinson’s ML models.

In [None]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from tqdm import tqdm
import os

In [None]:
# Load binary data
bin_path = '001_ml.bin'
data = np.fromfile(bin_path, dtype=np.float32)
data = data.reshape(-1, 6)  # [time_steps, 6 channels: accel + gyro]
data.shape

In [None]:
# Plot raw signals
plt.figure(figsize=(15, 6))
for i in range(6):
    plt.plot(data[:1000, i], label=f'Channel {i+1}')
plt.title('First 1000 Samples from 001_ml.bin')
plt.legend()
plt.xlabel('Time Step')
plt.ylabel('Sensor Value')
plt.grid(True)
plt.show()

In [None]:
# Load metadata
with open('observation_001.json') as f:
    obs = json.load(f)

with open('patient_001.json') as f:
    patient = json.load(f)

print('Participant ID:', patient['id'])
print('Condition:', patient['condition'])
print('Age:', patient['age'])
print('Gender:', patient['gender'])

In [None]:
# Windowing function (10.24s → 1024 samples @ 100Hz)
def window_data(data, size=256, stride=128):
    windows = []
    for start in range(0, data.shape[0] - size + 1, stride):
        windows.append(data[start:start + size])
    return np.stack(windows)

windowed = window_data(data)
print('Windowed shape:', windowed.shape)

In [None]:
# Visualize one window sample
plt.figure(figsize=(12, 6))
for i in range(6):
    plt.plot(windowed[0][:, i], label=f'Channel {i+1}')
plt.title('First Windowed Segment')
plt.legend()
plt.show()