In [None]:
# Import required libraries
import os
import pandas as pd
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
# 1. Merge Data: Merge each subject's PPG data with HR and HRV from master_dataset.csv and save merged files
ppg_folder = '.'
master_path = '../master_dataset.csv'
merged_folder = './merged_subjects'
os.makedirs(merged_folder, exist_ok=True)
master_df = pd.read_csv(master_path)
subject_files = [f for f in os.listdir(ppg_folder) if f.startswith('subject_') and f.endswith('.csv')]
for file in subject_files:
    sub_df = pd.read_csv(os.path.join(ppg_folder, file))
    sid = sub_df['Subject_ID'].iloc[0]
    hr = master_df.loc[master_df['Subject_ID'] == sid, 'HR'].values[0]
    hrv = master_df.loc[master_df['Subject_ID'] == sid, 'HRV'].values[0]
    sub_df['HR'] = hr
    sub_df['HRV'] = hrv
    sub_df.to_csv(os.path.join(merged_folder, file), index=False)
print(f'Merged {len(subject_files)} subject files and saved to {merged_folder}')

In [None]:
# 2. Visualize Merged PPG Data: Plot interactive line chart for each subject
for file in os.listdir(merged_folder):
    if file.endswith('.csv'):
        df = pd.read_csv(os.path.join(merged_folder, file))
        fig = px.line(df, x='Time', y='PPG', title=f'Subject_ID: {df["Subject_ID"].iloc[0]} - Time vs PPG')
        fig.show()

In [None]:
# 3. Combine and Split Data: Combine all merged subject datasets and split into train/test sets
merged_files = [os.path.join(merged_folder, f) for f in os.listdir(merged_folder) if f.endswith('.csv')]
combined_df = pd.concat([pd.read_csv(f) for f in merged_files], ignore_index=True)
train_df, test_df = train_test_split(combined_df, test_size=0.2, random_state=42)
print('Combined shape:', combined_df.shape)
print('Train shape:', train_df.shape)
print('Test shape:', test_df.shape)

In [None]:
# 4. Build and Evaluate Models: 4 regression models to predict HR, plot error metrics and visualizations
features = ['PPG', 'HRV', 'Time']
target = 'HR'
X_train = train_df[features]
y_train = train_df[target]
X_test = test_df[features]
y_test = test_df[target]
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42),
    'KNN': KNeighborsRegressor()
}
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    results[name] = {'MAE': mae, 'MSE': mse, 'RMSE': rmse, 'R2': r2, 'y_pred': y_pred}
    # Actual vs Predicted plot
    plt.figure(figsize=(5,3))
    plt.scatter(y_test, y_pred, alpha=0.5)
    plt.xlabel('Actual HR')
    plt.ylabel('Predicted HR')
    plt.title(f'Actual vs Predicted HR - {name}')
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
    plt.show()
    # Residuals plot
    residuals = y_test - y_pred
    plt.figure(figsize=(5,3))
    plt.scatter(y_pred, residuals, alpha=0.5)
    plt.axhline(0, color='r', linestyle='--')
    plt.xlabel('Predicted HR')
    plt.ylabel('Residuals')
    plt.title(f'Residuals Plot - {name}')
    plt.show()
# Model comparison bar chart for error metrics
metrics = ['MAE', 'MSE', 'RMSE', 'R2']
metrics_df = pd.DataFrame({m: [results[model][m] for model in models] for m in metrics}, index=models.keys())
metrics_df[metrics[:-1]].plot(kind='bar', figsize=(8,5))
plt.title('Model Comparison - Error Metrics')
plt.ylabel('Error')
plt.show()
metrics_df['R2'].plot(kind='bar', figsize=(8,5), color='orange')
plt.title('Model Comparison - R2 Score')
plt.ylabel('R2 Score')
plt.show()