In [6]:
import pandas as pd
import numpy as np

# ================================================
# 1. Load your two HR CSV files directly
# ================================================
df1 = pd.read_csv("heart_rate_2025-11-26.csv")
df2 = pd.read_csv("heart_rate_2025-11-27.csv")

print("Loaded files:")
print("heart_rate_2025-11-26.csv")
print("heart_rate_2025-11-27.csv")


# ================================================
# 2. Preprocess HR (ignore datasource, interpolate to 1 Hz)
# ================================================
def preprocess_hr(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.set_index('timestamp')

    # Keep only HR column
    df = df.rename(columns={'beats per minute': 'HR'})[['HR']]

    # Resample at 1 Hz and interpolate missing values
    df = df.resample('1s').mean().interpolate(method='linear')

    return df


df1 = preprocess_hr(df1)
df2 = preprocess_hr(df2)


# ================================================
# 3. Generate 5-second synthetic HRV features
# ================================================
def generate_features(hr_df, user_id):
    hr_values = hr_df['HR'].values
    window_size = 5

    rows = []
    window_index = 0

    for start in range(0, len(hr_values) - window_size + 1, window_size):
        segment = hr_values[start:start + window_size]

        # HR features
        feature_dict = {f"HR_{i+1}": segment[i] for i in range(5)}

        # Synthetic RMSSD features
        rmssd_values = np.abs(np.random.normal(loc=20, scale=8, size=5))
        for i in range(5):
            feature_dict[f"RMSSD_{i+1}"] = rmssd_values[i]

        # Synthetic frequency domain features
        lf = np.random.uniform(0.4, 0.9)
        hf = np.random.uniform(0.6, 1.2)
        feature_dict["LF"] = lf
        feature_dict["HF"] = hf
        feature_dict["LF/HF"] = lf / hf

        # Labels & metadata
        feature_dict["label"] = 0
        feature_dict["window_index"] = window_index
        feature_dict["user_id"] = user_id

        rows.append(feature_dict)
        window_index += 1

    df = pd.DataFrame(rows)

    # Column ordering
    column_order = [
        "HR_1","RMSSD_1",
        "HR_2","RMSSD_2",
        "HR_3","RMSSD_3",
        "HR_4","RMSSD_4",
        "HR_5","RMSSD_5",
        "LF","HF","LF/HF",
        "label","window_index","user_id"
    ]
    df = df[column_order]

    return df


# ================================================
# 4. Generate Features for both files
# ================================================
features1 = generate_features(df1, user_id="S20")
features2 = generate_features(df2, user_id="S21")

# Combine
final_df = pd.concat([features1, features2], ignore_index=True)


# ================================================
# 5. Export final dataframe to CSV
# ================================================
final_df.to_csv("synthetic_hrv_features.csv", index=False)
print("Final dataset shape:", final_df.shape)
print("Exported final dataset to 'synthetic_hrv_features.csv'")

# Preview
final_df.head()


Loaded files:
heart_rate_2025-11-26.csv
heart_rate_2025-11-27.csv
Final dataset shape: (22132, 16)
Exported final dataset to 'synthetic_hrv_features.csv'


Unnamed: 0,HR_1,RMSSD_1,HR_2,RMSSD_2,HR_3,RMSSD_3,HR_4,RMSSD_4,HR_5,RMSSD_5,LF,HF,LF/HF,label,window_index,user_id
0,71.0,5.59892,72.0,8.041438,72.5,16.389412,73.0,15.905972,73.0,18.730191,0.720624,0.807981,0.891882,0,0,S20
1,73.0,12.755215,73.0,24.500138,72.0,20.474436,72.5,29.288785,73.0,26.901307,0.808913,0.9257,0.873839,0,1,S20
2,73.5,8.788449,74.0,9.678526,75.0,2.255647,76.0,16.784607,76.5,12.327615,0.608693,1.150761,0.528948,0,2,S20
3,77.0,4.678697,78.0,26.305897,79.0,26.752396,78.5,26.405021,78.0,11.133607,0.605138,1.159197,0.522031,0,3,S20
4,78.0,33.127873,78.0,31.534115,78.0,20.218257,78.0,20.751841,78.0,2.113045,0.533194,0.986165,0.540674,0,4,S20


In [8]:
import pandas as pd
import numpy as np

# ================================================
# 1. Load your new HR CSV file
# ================================================
# Replace with your uploaded file name
hr_file = "heart_rate_2025-12-01.csv"
df = pd.read_csv(hr_file)
print(f"Loaded file: {hr_file}")

# ================================================
# 2. Preprocess HR (ignore datasource, interpolate to 1 Hz)
# ================================================
def preprocess_hr(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.set_index('timestamp')

    # Keep only HR column
    df = df.rename(columns={'beats per minute': 'HR'})[['HR']]

    # Resample at 1 Hz and interpolate missing values
    df = df.resample('1s').mean().interpolate(method='linear')

    return df

df = preprocess_hr(df)


# ================================================
# 3. Generate 5-second synthetic HRV features under stress
# ================================================
def generate_stress_features(hr_df, user_id):
    hr_values = hr_df['HR'].values
    window_size = 5

    rows = []
    window_index = 0

    for start in range(0, len(hr_values) - window_size + 1, window_size):
        segment = hr_values[start:start + window_size]

        # HR features
        feature_dict = {f"HR_{i+1}": segment[i] for i in range(5)}

        # Stress-induced RMSSD features (slightly higher variability)
        rmssd_values = np.abs(np.random.normal(loc=25, scale=10, size=5))
        for i in range(5):
            feature_dict[f"RMSSD_{i+1}"] = rmssd_values[i]

        # Stress frequency domain: typically higher LF/HF ratio
        lf = np.random.uniform(0.7, 1.2)
        hf = np.random.uniform(0.4, 0.8)
        feature_dict["LF"] = lf
        feature_dict["HF"] = hf
        feature_dict["LF/HF"] = lf / hf

        # Label & metadata
        feature_dict["label"] = 1  # stress condition
        feature_dict["window_index"] = window_index
        feature_dict["user_id"] = user_id

        rows.append(feature_dict)
        window_index += 1

    df_features = pd.DataFrame(rows)

    # Column ordering
    column_order = [
        "HR_1","RMSSD_1",
        "HR_2","RMSSD_2",
        "HR_3","RMSSD_3",
        "HR_4","RMSSD_4",
        "HR_5","RMSSD_5",
        "LF","HF","LF/HF",
        "label","window_index","user_id"
    ]
    df_features = df_features[column_order]

    return df_features

# ================================================
# 4. Generate stress features for the uploaded file
# ================================================
stress_features = generate_stress_features(df, user_id="S22")

# ================================================
# 5. Export to CSV
# ================================================
stress_features.to_csv("synthetic_hrv_stress_S22.csv", index=False)
print("Final stress dataset shape:", stress_features.shape)
print("Exported stress dataset to 'synthetic_hrv_stress_S22.csv'")

# Preview
stress_features.head()


Loaded file: heart_rate_2025-12-01.csv
Final stress dataset shape: (5702, 16)
Exported stress dataset to 'synthetic_hrv_stress_S22.csv'


Unnamed: 0,HR_1,RMSSD_1,HR_2,RMSSD_2,HR_3,RMSSD_3,HR_4,RMSSD_4,HR_5,RMSSD_5,LF,HF,LF/HF,label,window_index,user_id
0,101.0,30.375498,101.0,30.700646,101.0,12.303454,101.0,43.241231,100.0,36.308891,1.188916,0.575321,2.066524,1,0,S22
1,101.5,13.271343,103.0,36.375119,102.0,37.976326,101.0,25.271635,100.0,29.94661,1.18811,0.710147,1.673048,1,1,S22
2,99.0,12.513722,100.0,12.430949,101.0,26.780216,100.5,49.538044,100.0,9.207786,1.143978,0.535968,2.134413,1,2,S22
3,99.0,29.668664,98.0,26.528665,98.666667,35.42308,99.333333,23.866111,100.0,27.757387,0.983398,0.405687,2.424032,1,3,S22
4,100.5,12.62724,101.0,42.136923,101.0,17.152668,101.0,17.041578,101.0,34.329923,0.952605,0.641078,1.485941,1,4,S22


In [11]:
import pandas as pd

# Load the two CSV files
df_stress = pd.read_csv("synthetic_hrv_stress_S22.csv")
df_features = pd.read_csv("synthetic_hrv_features.csv")

# Combine them (row-wise if they have the same columns, column-wise otherwise)
# Option 1: If both files have the same columns (stack vertically)
df_combined = pd.concat([df_stress, df_features], ignore_index=True)

# Option 2: If the files have different columns (merge side by side)
# df_combined = pd.concat([df_stress, df_features], axis=1)

# Save the combined CSV
df_combined.to_csv("synthetic_hrv_combined.csv", index=False)

print("Exported combined dataset to 'synthetic_hrv_combined.csv'")

Exported combined dataset to 'synthetic_hrv_combined.csv'
