In [36]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_squared_error
import joblib


In [37]:
df = pd.read_csv("engineered_health_dataset.csv")


In [38]:
TARGET = "sleep_quality"

FEATURES = [
    "sleep_duration",
    "deep_sleep_ratio",
    "rem_sleep_ratio",
    "hrv_rmssd",
    "resting_heart_rate",
    "pm2_5",
    "noise_level",
    "temperature",
    "sleep_pressure",
    "steps"
]


In [39]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

In [40]:
dt_model = DecisionTreeRegressor(
    max_depth=6,              # controls overfitting
    min_samples_split=20,
    min_samples_leaf=10,
    random_state=42
)

dt_model.fit(X_train, y_train)


In [41]:
y_train_pred = dt_model.predict(X_train)
y_test_pred = dt_model.predict(X_test)

print("Train R² :", round(r2_score(y_train, y_train_pred), 3))
print("Test  R² :", round(r2_score(y_test, y_test_pred), 3))
print("RMSE     :", round(np.sqrt(mean_squared_error(y_test, y_test_pred)), 3))


Train R² : 0.585
Test  R² : 0.586
RMSE     : 4.041


In [42]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import joblib

In [43]:
df = pd.read_csv("engineered_health_dataset.csv")

In [44]:
df.head()

Unnamed: 0,hour_of_day,day_of_week,temperature,humidity,air_quality_index,pm2_5,noise_level,activity_type,steps,calories_burned,...,oxygen_stress,immune_stress,illness_risk,deep_sleep_ratio,rem_sleep_ratio,sleep_architecture,env_sleep_disruption,sleep_quality,high_stress_hour,hourly_stress
0,6.0,0.0,42.032235,34.509013,169.008992,89.493557,57.523692,idle,195.0,9.0,...,0.0,0,62.385072,0.23685,0.288002,0.925148,0.844738,31.650891,0,41.11019
1,19.0,6.0,26.521176,36.690251,120.147978,76.359016,73.853983,idle,2835.0,103.0,...,0.0,0,36.96875,0.184112,0.27586,0.958252,0.929964,24.8221,1,41.236688
2,14.0,6.0,26.334625,54.602926,25.251015,9.971804,58.135266,walk,4115.0,183.0,...,0.0,0,26.496376,0.193332,0.289944,0.953388,0.417845,42.038868,0,41.393029
3,10.0,6.0,37.329564,56.491538,53.214365,28.168089,56.909952,idle,1937.0,55.0,...,0.0,0,10.717068,0.225071,0.217208,0.942137,0.485077,42.22649,0,41.253118
4,7.0,1.0,27.272382,84.905974,20.0,8.596788,50.342253,idle,2517.0,91.0,...,0.0,0,35.35885,0.166471,0.297987,0.918484,0.290604,50.429197,0,41.224667


In [45]:
TARGET = "sleep_quality"

FEATURES = [
    "sleep_duration",
    "deep_sleep_ratio",
    "rem_sleep_ratio",
    "hrv_rmssd",
    "resting_heart_rate",
    "pm2_5",
    "noise_level",
    "temperature",
    "sleep_pressure",
    "steps"
]


In [46]:
X = df[FEATURES]
y = df[TARGET]


In [47]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [48]:
y_train_pred = dt_model.predict(X_train)
y_test_pred = dt_model.predict(X_test)

print("Train R² :", round(r2_score(y_train, y_train_pred), 3))
print("Test  R² :", round(r2_score(y_test, y_test_pred), 3))
print("RMSE     :", round(np.sqrt(mean_squared_error(y_test, y_test_pred)), 3))


Train R² : 0.586
Test  R² : 0.584
RMSE     : 4.032


In [49]:
rf_model = RandomForestRegressor(
    n_estimators=150,
    max_depth=10,
    min_samples_split=8,
    min_samples_leaf=4,
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)


In [51]:
y_train_pred = rf_model.predict(X_train)
y_test_pred = rf_model.predict(X_test)

print("Train R² :", round(r2_score(y_train, y_train_pred), 3))
print("Test  R² :", round(r2_score(y_test, y_test_pred), 3))
print("RMSE     :", round(np.sqrt(mean_squared_error(y_test, y_test_pred)), 3))


Train R² : 0.647
Test  R² : 0.624
RMSE     : 3.835


In [52]:
import joblib
joblib.dump(rf_model, "sleep_quality_model.pkl")

['sleep_quality_model.pkl']