In [1]:
import pandas as pd

df = pd.read_csv("wearable_ml_ready.csv")

df["burnout_score"] = (
    0.35 * (1 - df["hrv_7d_avg"] / df["baseline_hrv"]) +
    0.30 * df["sleep_pressure"] +
    0.20 * df["stress_score"] +
    0.15 * (df["activity_load"] / df["activity_load"].max())
)

df.to_csv("wearable_burnout.csv", index=False)

print("✅ burnout_score created")


✅ burnout_score created


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# ===============================
# LOAD DATA
# ===============================
df = pd.read_csv("wearable_burnout.csv")

# ===============================
# FEATURES & TARGET
# ===============================
features = [
    "hrv_7d_avg",
    "sleep_7d_avg",
    "sleep_pressure",
    "stress_score",
    "activity_load",
    "baseline_hrv",
    "hrv_deviation"
]

X = df[features]
y = df["burnout_score"]

# ===============================
# SPLIT
# ===============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ===============================
# MODEL
# ===============================
rf = RandomForestRegressor(
    n_estimators=50,
    max_depth=8,
    max_features="sqrt",
    n_jobs=-1,
    random_state=42
)

# ===============================
# TRAIN
# ===============================
rf.fit(X_train, y_train)

# ===============================
# EVALUATE
# ===============================
y_pred = rf.predict(X_test)

print("Random Forest – Invisible Burnout Score")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R2 :", r2_score(y_test, y_pred))

# ===============================
# FEATURE IMPORTANCE
# ===============================
importances = pd.Series(
    rf.feature_importances_,
    index=features
).sort_values(ascending=False)

print("\nTop Burnout Drivers:")
print(importances)


Random Forest – Invisible Burnout Score
MAE: 0.2952287374103231
R2 : 0.9645227736716395

Top Burnout Drivers:
stress_score      0.610638
sleep_pressure    0.189388
hrv_deviation     0.072843
sleep_7d_avg      0.053032
baseline_hrv      0.026378
activity_load     0.024560
hrv_7d_avg        0.023161
dtype: float64


In [3]:
import numpy as np
import pandas as pd

df = pd.read_csv("wearable_burnout.csv")
df = df.sort_values(["user_id", "date"])

seq_features = [
    "hrv_rmssd_ms",
    "sleep_duration_hours",
    "stress_score",
    "activity_load"
]

SEQ_LEN = 7

X_seq, y_seq = [], []

for user_id, g in df.groupby("user_id"):
    g = g.reset_index(drop=True)
    for i in range(len(g) - SEQ_LEN):
        X_seq.append(g.loc[i:i+SEQ_LEN-1, seq_features].values)
        y_seq.append(g.loc[i+SEQ_LEN, "burnout_score"])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

print("Sequence shape:", X_seq.shape)


Sequence shape: (53100, 7, 4)


In [4]:
import joblib

# rf = trained Random Forest Burnout model
joblib.dump(rf, "invisible_burnout_random_forest.pkl")

print("✅ Invisible Burnout Random Forest model saved successfully")


✅ Invisible Burnout Random Forest model saved successfully
