In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm, gamma, beta

# Set random seed for reproducibility
np.random.seed(42)

# Parameters
n_users = 50
n_days = 70
total_records = n_users * n_days

# Initialize data storage
data = {
    'user_id': [],
    'day_index': [],
    'sleep_duration': [],
    'step_count': [],
    'resting_heart_rate': [],
    'stress_level': [],
    'sleep_onset_time': [],
    'HR_day_avg': [],
    'HR_sleep_min': []
}

# User-specific baselines
user_baselines = {
    'sleep_duration': norm.rvs(loc=7.2, scale=0.5, size=n_users),
    'step_count': gamma.rvs(a=2, scale=3500, size=n_users),
    'resting_heart_rate': norm.rvs(loc=68, scale=4, size=n_users),
    'stress_level': beta.rvs(a=2, b=5, size=n_users),
    'sleep_onset_time': norm.rvs(loc=1380, scale=45, size=n_users),
    'HR_day_avg': norm.rvs(loc=85, scale=3.5, size=n_users),
    'HR_sleep_min': norm.rvs(loc=55, scale=3, size=n_users)
}

# Ensure HR_sleep_min < HR_day_avg for baselines
for i in range(n_users):
    while user_baselines['HR_sleep_min'][i] >= user_baselines['HR_day_avg'][i]:
        user_baselines['HR_sleep_min'][i] = norm.rvs(loc=55, scale=3)

# Generate time-series data for each user
for user in range(n_users):
    # Initialize random walk states
    sleep_state = user_baselines['sleep_duration'][user]
    step_state = user_baselines['step_count'][user]
    rhr_state = user_baselines['resting_heart_rate'][user]
    stress_state = user_baselines['stress_level'][user]
    onset_state = user_baselines['sleep_onset_time'][user]
    hr_day_state = user_baselines['HR_day_avg'][user]
    hr_sleep_state = user_baselines['HR_sleep_min'][user]

    for day in range(n_days):
        # Stress level (Beta distribution, with occasional spikes)
        stress = beta.rvs(a=2, b=5)
        if np.random.random() < 0.2:  # 20% chance of stress spike
            stress = np.clip(stress + 0.3, 0, 1)
        stress = np.clip(stress + norm.rvs(loc=0, scale=0.05), 0, 1)
        stress_state = 0.8 * stress_state + 0.2 * stress  # Smooth transitions

        # Sleep duration (affected by stress)
        sleep_adjust = -0.5 if stress_state > 0.7 else 0
        sleep = norm.rvs(loc=sleep_state + sleep_adjust, scale=1.1)
        sleep = np.clip(sleep, 3.5, 9)
        sleep_state = 0.9 * sleep_state + 0.1 * sleep  # Random walk

        # Step count (affected by stress and sleep)
        step_adjust = -2000 if stress_state > 0.7 or sleep < 6 else 0
        steps = gamma.rvs(a=2, scale=step_state/2) + step_adjust
        steps = np.clip(steps, 0, 25000)
        step_state = 0.9 * step_state + 0.1 * steps

        # Resting heart rate (affected by stress and sleep)
        rhr_adjust = 3 if stress_state > 0.7 or sleep < 6 else 0
        rhr = norm.rvs(loc=rhr_state + rhr_adjust, scale=8)
        rhr = np.clip(rhr, 45, 95)
        rhr_state = 0.9 * rhr_state + 0.1 * rhr

        # Sleep onset time
        onset = norm.rvs(loc=onset_state, scale=90)
        onset = np.clip(onset, 1320, 1800)  # 10:00 PM to 3:00 AM
        onset_state = 0.9 * onset_state + 0.1 * onset

        # HR_day_avg (affected by stress and sleep)
        hr_day_adjust = 5 if stress_state > 0.7 or sleep < 6 else 0
        hr_day = norm.rvs(loc=hr_day_state + hr_day_adjust, scale=7)
        hr_day = np.clip(hr_day, 70, 110)
        hr_day_state = 0.9 * hr_day_state + 0.1 * hr_day

        # HR_sleep_min (affected by stress and sleep)
        hr_sleep_adjust = 3 if stress_state > 0.7 else -2 if sleep > 7.5 else 0
        hr_sleep = norm.rvs(loc=hr_sleep_state + hr_sleep_adjust, scale=6)
        hr_sleep = np.clip(hr_sleep, 40, min(75, hr_day - 5))  # Ensure HR_sleep_min < HR_day_avg
        hr_sleep_state = 0.9 * hr_sleep_state + 0.1 * hr_sleep

        # Append data
        data['user_id'].append(user)
        data['day_index'].append(day)
        data['sleep_duration'].append(round(sleep, 2))
        data['step_count'].append(int(steps))
        data['resting_heart_rate'].append(round(rhr, 1))
        data['stress_level'].append(round(stress_state, 2))
        data['sleep_onset_time'].append(int(onset))
        data['HR_day_avg'].append(round(hr_day, 1))
        data['HR_sleep_min'].append(round(hr_sleep, 1))

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('wearable_sensor_data.csv', index=False)