# Data Preprocessing for DREAMT Dataset
This notebook handles biosignal cleaning, normalization, and windowing for downstream modeling.
Dataset Source: PhysioNet DREAMT (64Hz wearable data)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.signal import butter, filtfilt

# Load example data (update path as needed)
df = pd.read_csv('../data/S002_whole_df.csv')
df.head()

In [None]:
# Normalize biosignals
def normalize_signal(signal):
    return (signal - np.mean(signal)) / np.std(signal)

# Example: Normalize BVP
df['BVP_norm'] = normalize_signal(df['BVP'])

# Plot raw vs normalized BVP
plt.figure(figsize=(12, 4))
plt.plot(df['BVP'][:1000], label='Raw')
plt.plot(df['BVP_norm'][:1000], label='Normalized')
plt.legend()
plt.title('BVP Signal (Raw vs Normalized)')
plt.show()

In [None]:
# Define function to create sliding windows
def create_windows(data, window_size, step_size):
    windows = []
    for start in range(0, len(data) - window_size, step_size):
        windows.append(data[start:start + window_size])
    return np.array(windows)

# Example on BVP
bvp_windows = create_windows(df['BVP_norm'].values, window_size=512, step_size=128)
print(f'Generated {bvp_windows.shape[0]} windows of length {bvp_windows.shape[1]}')