In [1]:
import pandas as pd

daily_sleep = pd.read_csv("daily_sleep_analytics.csv", parse_dates=["sleep_date"])
daily_steps = pd.read_csv("daily_steps_analytics.csv", parse_dates=["date"])
daily_hr = pd.read_csv("daily_heart_rate_analytics.csv", parse_dates=["date"])

In [5]:
daily_sleep.shape, daily_steps.shape, daily_hr.shape

((413, 16), (1383, 7), (577, 9))

In [6]:
daily_sleep.head()

Unnamed: 0,sleep_date,total_sleep_min,in_bed_min,sleep_efficiency,deep_pct,rem_pct,core_pct,unspecified_pct,stage_available_flag,poor_sleep_flag,excellent_sleep_flag,low_deep_sleep_flag,low_rem_sleep_flag,fragmented_sleep_flag,sleep_extreme_flag,sleep_eff_invalid_flag
0,2021-10-06,215.883333,512.966667,0.420853,0.0,0.0,0.0,1.0,0,1,0,1,1,1,0,0
1,2021-10-07,248.95,288.983333,0.861468,0.0,0.0,0.0,1.0,0,1,0,1,1,0,0,0
2,2021-10-09,525.85,533.983333,0.984769,0.0,0.0,0.0,1.0,0,0,1,1,1,0,0,0
3,2021-10-10,621.65,625.983333,0.993078,0.0,0.0,0.0,1.0,0,0,1,1,1,0,0,0
4,2021-10-13,293.85,704.983333,0.416818,0.0,0.0,0.0,1.0,0,1,0,1,1,1,0,0


In [7]:
daily_steps.head()

Unnamed: 0,date,total_steps,intervals,max_spm,low_activity_flag,false_spm_flag,high_activity_flag
0,2021-10-06,6884,41,91.652755,0,0,0
1,2021-10-07,8421,66,42.170284,0,0,0
2,2021-10-08,12032,82,97.662771,0,0,1
3,2021-10-09,8443,65,82.838063,0,0,0
4,2021-10-10,4885,50,90.550918,0,0,0


In [8]:
daily_hr.head()

Unnamed: 0,date,avg_hr,min_hr,max_hr,hr_std,high_hr_minutes,low_hr_minutes,elevated_hr_flag,high_hr_variability_flag
0,2022-04-12,71.0,71.0,71.0,,0,0,0,0
1,2022-04-13,69.0,69.0,69.0,0.0,0,0,0,0
2,2022-04-14,71.0,71.0,71.0,0.0,0,0,0,0
3,2022-04-15,69.0,69.0,69.0,0.0,0,0,0,0
4,2022-04-16,68.0,68.0,68.0,0.0,0,0,0,0


In [9]:
#creating union of all tables

In [10]:
sleep_dates = daily_sleep[["sleep_date"]].rename(columns={"sleep_date": "date"})
steps_dates = daily_steps[["date"]]
hr_dates = daily_hr[["date"]]

health_timeline = (
    pd.concat([sleep_dates, steps_dates, hr_dates], ignore_index=True)
    .drop_duplicates()
    .sort_values("date")
    .reset_index(drop=True)
)

In [11]:
health_timeline

Unnamed: 0,date
0,2021-10-06
1,2021-10-07
2,2021-10-08
3,2021-10-09
4,2021-10-10
...,...
1385,2025-12-28
1386,2025-12-29
1387,2025-12-30
1388,2025-12-31


In [12]:
health_timeline = health_timeline.merge(
    daily_sleep,
    left_on="date",
    right_on="sleep_date",
    how="left"
).drop(columns=["sleep_date"])

In [13]:
health_timeline[["date", "total_sleep_min", "sleep_efficiency"]].head(10)

Unnamed: 0,date,total_sleep_min,sleep_efficiency
0,2021-10-06,215.883333,0.420853
1,2021-10-07,248.95,0.861468
2,2021-10-08,,
3,2021-10-09,525.85,0.984769
4,2021-10-10,621.65,0.993078
5,2021-10-11,,
6,2021-10-13,293.85,0.416818
7,2021-10-14,440.883333,0.881796
8,2021-10-15,648.65,0.996416
9,2021-10-16,677.733333,0.958627


In [14]:
health_timeline = health_timeline.merge(
    daily_steps,
    on="date",
    how="left"
)

In [15]:
health_timeline[["date", "total_steps", "low_activity_flag"]].head(10)

Unnamed: 0,date,total_steps,low_activity_flag
0,2021-10-06,6884.0,0.0
1,2021-10-07,8421.0,0.0
2,2021-10-08,12032.0,0.0
3,2021-10-09,8443.0,0.0
4,2021-10-10,4885.0,0.0
5,2021-10-11,8505.0,0.0
6,2021-10-13,1177.0,1.0
7,2021-10-14,5115.0,0.0
8,2021-10-15,6225.0,0.0
9,2021-10-16,5405.0,0.0


In [16]:
health_timeline

Unnamed: 0,date,total_sleep_min,in_bed_min,sleep_efficiency,deep_pct,rem_pct,core_pct,unspecified_pct,stage_available_flag,poor_sleep_flag,...,low_rem_sleep_flag,fragmented_sleep_flag,sleep_extreme_flag,sleep_eff_invalid_flag,total_steps,intervals,max_spm,low_activity_flag,false_spm_flag,high_activity_flag
0,2021-10-06,215.883333,512.966667,0.420853,0.0,0.0,0.0,1.0,0.0,1.0,...,1.0,1.0,0.0,0.0,6884.0,41.0,91.652755,0.0,0.0,0.0
1,2021-10-07,248.950000,288.983333,0.861468,0.0,0.0,0.0,1.0,0.0,1.0,...,1.0,0.0,0.0,0.0,8421.0,66.0,42.170284,0.0,0.0,0.0
2,2021-10-08,,,,,,,,,,...,,,,,12032.0,82.0,97.662771,0.0,0.0,1.0
3,2021-10-09,525.850000,533.983333,0.984769,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,8443.0,65.0,82.838063,0.0,0.0,0.0
4,2021-10-10,621.650000,625.983333,0.993078,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,4885.0,50.0,90.550918,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1385,2025-12-28,,,,,,,,,,...,,,,,7705.0,77.0,600.000000,0.0,1.0,0.0
1386,2025-12-29,,,,,,,,,,...,,,,,2279.0,29.0,480.000000,1.0,1.0,0.0
1387,2025-12-30,,,,,,,,,,...,,,,,2112.0,34.0,540.000000,1.0,1.0,0.0
1388,2025-12-31,,,,,,,,,,...,,,,,2309.0,25.0,540.000000,1.0,1.0,0.0


In [17]:
health_timeline = health_timeline.merge(
    daily_hr,
    on="date",
    how="left"
)

In [18]:
health_timeline[
    ["date", "avg_hr", "hr_std", "elevated_hr_flag"]
].head(10)

Unnamed: 0,date,avg_hr,hr_std,elevated_hr_flag
0,2021-10-06,,,
1,2021-10-07,,,
2,2021-10-08,,,
3,2021-10-09,,,
4,2021-10-10,,,
5,2021-10-11,,,
6,2021-10-13,,,
7,2021-10-14,,,
8,2021-10-15,,,
9,2021-10-16,,,


In [19]:
health_timeline

Unnamed: 0,date,total_sleep_min,in_bed_min,sleep_efficiency,deep_pct,rem_pct,core_pct,unspecified_pct,stage_available_flag,poor_sleep_flag,...,false_spm_flag,high_activity_flag,avg_hr,min_hr,max_hr,hr_std,high_hr_minutes,low_hr_minutes,elevated_hr_flag,high_hr_variability_flag
0,2021-10-06,215.883333,512.966667,0.420853,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,,,,,,,,
1,2021-10-07,248.950000,288.983333,0.861468,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,,,,,,,,
2,2021-10-08,,,,,,,,,,...,0.0,1.0,,,,,,,,
3,2021-10-09,525.850000,533.983333,0.984769,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,,,,,,,,
4,2021-10-10,621.650000,625.983333,0.993078,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1385,2025-12-28,,,,,,,,,,...,1.0,0.0,,,,,,,,
1386,2025-12-29,,,,,,,,,,...,1.0,0.0,,,,,,,,
1387,2025-12-30,,,,,,,,,,...,1.0,0.0,,,,,,,,
1388,2025-12-31,,,,,,,,,,...,1.0,0.0,,,,,,,,


In [20]:
health_timeline["sleep_available"] = health_timeline["total_sleep_min"].notna().astype(int)
health_timeline["steps_available"] = health_timeline["total_steps"].notna().astype(int)
health_timeline["hr_available"] = health_timeline["avg_hr"].notna().astype(int)

In [21]:
health_timeline[["sleep_available", "steps_available", "hr_available"]].sum()

sleep_available     413
steps_available    1383
hr_available        577
dtype: int64

In [22]:
health_timeline

Unnamed: 0,date,total_sleep_min,in_bed_min,sleep_efficiency,deep_pct,rem_pct,core_pct,unspecified_pct,stage_available_flag,poor_sleep_flag,...,min_hr,max_hr,hr_std,high_hr_minutes,low_hr_minutes,elevated_hr_flag,high_hr_variability_flag,sleep_available,steps_available,hr_available
0,2021-10-06,215.883333,512.966667,0.420853,0.0,0.0,0.0,1.0,0.0,1.0,...,,,,,,,,1,1,0
1,2021-10-07,248.950000,288.983333,0.861468,0.0,0.0,0.0,1.0,0.0,1.0,...,,,,,,,,1,1,0
2,2021-10-08,,,,,,,,,,...,,,,,,,,0,1,0
3,2021-10-09,525.850000,533.983333,0.984769,0.0,0.0,0.0,1.0,0.0,0.0,...,,,,,,,,1,1,0
4,2021-10-10,621.650000,625.983333,0.993078,0.0,0.0,0.0,1.0,0.0,0.0,...,,,,,,,,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1385,2025-12-28,,,,,,,,,,...,,,,,,,,0,1,0
1386,2025-12-29,,,,,,,,,,...,,,,,,,,0,1,0
1387,2025-12-30,,,,,,,,,,...,,,,,,,,0,1,0
1388,2025-12-31,,,,,,,,,,...,,,,,,,,0,1,0


In [23]:
health_timeline["sleep_risk_evidence"] = (
    (health_timeline["poor_sleep_flag"] == 1) |
    (health_timeline["fragmented_sleep_flag"] == 1) |
    (health_timeline["sleep_extreme_flag"] == 1)
).astype(int)

In [24]:
health_timeline["sleep_risk_evidence"].value_counts(dropna=False)

sleep_risk_evidence
0    1078
1     312
Name: count, dtype: int64

In [25]:
health_timeline["activity_risk_evidence"] = (
    health_timeline["low_activity_flag"] == 1
).astype(int)

In [26]:
health_timeline["activity_risk_evidence"].value_counts(dropna=False)

activity_risk_evidence
0    1200
1     190
Name: count, dtype: int64

In [27]:
health_timeline["hr_risk_evidence"] = (
    (health_timeline["elevated_hr_flag"] == 1) |
    (health_timeline["high_hr_variability_flag"] == 1)
).astype(int)

In [28]:
health_timeline["hr_risk_evidence"].value_counts(dropna=False)

hr_risk_evidence
0    1103
1     287
Name: count, dtype: int64

In [29]:
health_timeline["risk_evidence_count"] = (
    health_timeline[[
        "sleep_risk_evidence",
        "activity_risk_evidence",
        "hr_risk_evidence"
    ]]
    .fillna(0)
    .sum(axis=1)
)

In [30]:
health_timeline["health_risk_state"] = "Stable"

health_timeline.loc[
    health_timeline["risk_evidence_count"] == 1,
    "health_risk_state"
] = "Mild Risk"

health_timeline.loc[
    health_timeline["risk_evidence_count"] >= 2,
    "health_risk_state"
] = "Elevated Risk"

In [31]:
health_timeline["health_risk_state"].value_counts()

health_risk_state
Stable           747
Mild Risk        497
Elevated Risk    146
Name: count, dtype: int64

In [33]:
health_timeline["elevated_risk_flag"] = (
    health_timeline["health_risk_state"] == "Elevated Risk"
).astype(int)

In [34]:
feature_cols = [
    # Sleep
    "total_sleep_min",
    "sleep_efficiency",
    "deep_pct",
    "rem_pct",
    "fragmented_sleep_flag",
    "sleep_extreme_flag",
    "stage_available_flag",

    # Steps
    "total_steps",
    "intervals",
    "low_activity_flag",
    "high_activity_flag",

    # Heart Rate
    "avg_hr",
    "hr_std",
    "high_hr_minutes",
    "low_hr_minutes",
    "elevated_hr_flag",
    "high_hr_variability_flag",

    # Availability
    "sleep_available",
    "steps_available",
    "hr_available"
]

X = health_timeline[feature_cols]
y = health_timeline["elevated_risk_flag"]

In [37]:
X.shape, y.shape

((1390, 20), (1390,))

In [38]:
y.value_counts(normalize=True)

elevated_risk_flag
0    0.894964
1    0.105036
Name: proportion, dtype: float64

In [39]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.25,
    shuffle=False
)

X_train.shape, X_test.shape

((1042, 20), (348, 20))

In [40]:
X_train.isna().sum().sort_values(ascending=False)

sleep_efficiency            797
total_sleep_min             753
deep_pct                    753
rem_pct                     753
fragmented_sleep_flag       753
sleep_extreme_flag          753
stage_available_flag        753
hr_std                      656
high_hr_minutes             652
high_hr_variability_flag    652
elevated_hr_flag            652
low_hr_minutes              652
avg_hr                      652
high_activity_flag            7
low_activity_flag             7
intervals                     7
total_steps                   7
sleep_available               0
steps_available               0
hr_available                  0
dtype: int64

In [41]:
from sklearn.impute import SimpleImputer

num_imputer = SimpleImputer(strategy="median")

X_train_imputed = num_imputer.fit_transform(X_train)
X_test_imputed = num_imputer.transform(X_test)

In [42]:
X_train_imputed.shape, X_test_imputed.shape

((1042, 20), (348, 20))

In [44]:
import numpy as np
np.isnan(X_train_imputed).sum(), np.isnan(X_test_imputed).sum()

(np.int64(0), np.int64(0))

In [45]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

In [46]:
X_train_scaled.mean(axis=0).round(2), X_train_scaled.std(axis=0).round(2)

(array([-0., -0.,  0., -0., -0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -0.,
         0.,  0., -0.,  0., -0., -0.,  0.]),
 array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1.]))

In [47]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(
    class_weight="balanced",  # critical due to 10% positive class
    max_iter=1000,
    random_state=42
)

model.fit(X_train_scaled, y_train)

In [48]:
model.classes_

array([0, 1])

In [49]:
y_pred = model.predict(X_test_scaled)
y_proba = model.predict_proba(X_test_scaled)[:, 1]

In [51]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.98      0.97       306
           1       0.82      0.79      0.80        42

    accuracy                           0.95       348
   macro avg       0.90      0.88      0.89       348
weighted avg       0.95      0.95      0.95       348



In [52]:
from sklearn.metrics import roc_auc_score

roc_auc = roc_auc_score(y_test, y_proba)
roc_auc

np.float64(0.9583722377840025)

In [53]:
import pandas as pd

pd.Series(y_proba).describe()

count    3.480000e+02
mean     1.195323e-01
std      2.849692e-01
min      8.242395e-07
25%      1.407006e-06
50%      1.929636e-04
75%      5.238847e-03
max      1.000000e+00
dtype: float64

In [54]:
threshold = 0.05

y_pred_tuned = (y_proba >= threshold).astype(int)

In [55]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred_tuned))

              precision    recall  f1-score   support

           0       0.97      0.91      0.94       306
           1       0.55      0.81      0.65        42

    accuracy                           0.90       348
   macro avg       0.76      0.86      0.80       348
weighted avg       0.92      0.90      0.90       348



In [56]:
health_timeline.loc[X_test.index, "risk_probability"] = y_proba
health_timeline.loc[X_test.index, "ml_alert_flag"] = y_pred_tuned
health_timeline.loc[X_test.index, "ml_alert_threshold"] = 0.05

In [57]:
import pandas as pd

feature_importance = pd.DataFrame({
    "feature": X.columns,
    "coefficient": model.coef_[0]
}).sort_values("coefficient", ascending=False)

feature_importance

Unnamed: 0,feature,coefficient
17,sleep_available,3.791323
16,high_hr_variability_flag,2.775633
9,low_activity_flag,2.630672
4,fragmented_sleep_flag,1.613386
1,sleep_efficiency,1.173997
19,hr_available,0.85146
11,avg_hr,0.69675
14,low_hr_minutes,0.538597
2,deep_pct,0.416263
3,rem_pct,0.314684


In [58]:
feature_importance["abs_importance"] = feature_importance["coefficient"].abs()
feature_importance.sort_values("abs_importance", ascending=False)

Unnamed: 0,feature,coefficient,abs_importance
17,sleep_available,3.791323,3.791323
16,high_hr_variability_flag,2.775633,2.775633
9,low_activity_flag,2.630672,2.630672
4,fragmented_sleep_flag,1.613386,1.613386
1,sleep_efficiency,1.173997,1.173997
0,total_sleep_min,-0.878634,0.878634
19,hr_available,0.85146,0.85146
6,stage_available_flag,-0.731644,0.731644
11,avg_hr,0.69675,0.69675
14,low_hr_minutes,0.538597,0.538597


In [59]:
risk_score_map = {
    "Stable": 0,
    "Mild Risk": 1,
    "Elevated Risk": 2
}

health_timeline["daily_risk_score"] = (
    health_timeline["health_risk_state"]
    .map(risk_score_map)
)

In [60]:
health_timeline["risk_7d_avg"] = (
    health_timeline["daily_risk_score"]
    .rolling(window=7, min_periods=3)
    .mean()
)

health_timeline["risk_14d_avg"] = (
    health_timeline["daily_risk_score"]
    .rolling(window=14, min_periods=5)
    .mean()
)

In [61]:
health_timeline["risk_trend"] = (
    health_timeline["risk_7d_avg"]
    - health_timeline["risk_14d_avg"]
)

In [62]:
health_timeline["early_warning_flag"] = (
    (health_timeline["risk_trend"] > 0.3) &
    (health_timeline["daily_risk_score"] < 2)
).astype(int)

In [63]:
health_timeline["risk_volatility_14d"] = (
    health_timeline["daily_risk_score"]
    .rolling(14, min_periods=5)
    .std()
)

In [64]:
def trajectory_label(row):
    if row["early_warning_flag"] == 1:
        return "Early Warning"
    if row["daily_risk_score"] == 2:
        return "Acute Risk"
    if row["risk_trend"] < -0.2:
        return "Recovering"
    return "Stable"

health_timeline["risk_trajectory_state"] = (
    health_timeline.apply(trajectory_label, axis=1)
)

In [65]:
health_timeline

Unnamed: 0,date,total_sleep_min,in_bed_min,sleep_efficiency,deep_pct,rem_pct,core_pct,unspecified_pct,stage_available_flag,poor_sleep_flag,...,risk_probability,ml_alert_flag,ml_alert_threshold,daily_risk_score,risk_7d_avg,risk_14d_avg,risk_trend,early_warning_flag,risk_volatility_14d,risk_trajectory_state
0,2021-10-06,215.883333,512.966667,0.420853,0.0,0.0,0.0,1.0,0.0,1.0,...,,,,1,,,,0,,Stable
1,2021-10-07,248.950000,288.983333,0.861468,0.0,0.0,0.0,1.0,0.0,1.0,...,,,,1,,,,0,,Stable
2,2021-10-08,,,,,,,,,,...,,,,0,0.666667,,,0,,Stable
3,2021-10-09,525.850000,533.983333,0.984769,0.0,0.0,0.0,1.0,0.0,0.0,...,,,,0,0.500000,,,0,,Stable
4,2021-10-10,621.650000,625.983333,0.993078,0.0,0.0,0.0,1.0,0.0,0.0,...,,,,0,0.400000,0.400000,0.000000,0,0.547723,Stable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1385,2025-12-28,,,,,,,,,,...,8.357086e-07,0.0,0.05,0,0.285714,0.500000,-0.214286,0,0.518875,Recovering
1386,2025-12-29,,,,,,,,,,...,4.616283e-03,0.0,0.05,1,0.428571,0.500000,-0.071429,0,0.518875,Stable
1387,2025-12-30,,,,,,,,,,...,4.499366e-03,0.0,0.05,1,0.571429,0.500000,0.071429,0,0.518875,Stable
1388,2025-12-31,,,,,,,,,,...,4.714869e-03,0.0,0.05,1,0.571429,0.571429,0.000000,0,0.513553,Stable


In [66]:
def build_risk_reasons(row):
    reasons = []

    # Sleep reasons
    if row.get("poor_sleep_flag", 0) == 1:
        reasons.append("Poor sleep duration or efficiency")
    if row.get("fragmented_sleep_flag", 0) == 1:
        reasons.append("Fragmented sleep pattern")
    if row.get("low_deep_sleep_flag", 0) == 1:
        reasons.append("Low deep sleep recovery")

    # Activity reasons
    if row.get("low_activity_flag", 0) == 1:
        reasons.append("Low physical activity")
    if row.get("high_activity_flag", 0) == 1:
        reasons.append("Excessive physical strain")

    # Heart rate reasons
    if row.get("high_hr_variability_flag", 0) == 1:
        reasons.append("High heart rate variability")
    if row.get("high_hr_minutes", 0) > 120:
        reasons.append("Prolonged elevated heart rate")

    # Temporal warning
    if row.get("early_warning_flag", 0) == 1:
        reasons.append("Risk trend increasing over recent days")

    return "; ".join(reasons)


In [67]:
health_timeline["risk_explanation"] = (
    health_timeline.apply(build_risk_reasons, axis=1)
)

In [68]:
def clinical_narrative(row):
    if row["risk_trajectory_state"] == "Early Warning":
        return (
            "Early signs of health strain detected. "
            "Recent patterns show worsening recovery and activity balance. "
            "Monitoring and lifestyle adjustment recommended."
        )

    if row["risk_trajectory_state"] == "Acute Risk":
        return (
            "Elevated health risk detected. "
            "Multiple physiological signals indicate possible stress or fatigue. "
            "Clinical review is advised."
        )

    if row["risk_trajectory_state"] == "Recovering":
        return (
            "Recovery trend observed. "
            "Physiological signals show improvement compared to previous days."
        )

    return "Health parameters appear stable."

In [69]:
health_timeline["clinical_summary"] = (
    health_timeline.apply(clinical_narrative, axis=1)
)

In [70]:
health_timeline

Unnamed: 0,date,total_sleep_min,in_bed_min,sleep_efficiency,deep_pct,rem_pct,core_pct,unspecified_pct,stage_available_flag,poor_sleep_flag,...,ml_alert_threshold,daily_risk_score,risk_7d_avg,risk_14d_avg,risk_trend,early_warning_flag,risk_volatility_14d,risk_trajectory_state,risk_explanation,clinical_summary
0,2021-10-06,215.883333,512.966667,0.420853,0.0,0.0,0.0,1.0,0.0,1.0,...,,1,,,,0,,Stable,Poor sleep duration or efficiency; Fragmented ...,Health parameters appear stable.
1,2021-10-07,248.950000,288.983333,0.861468,0.0,0.0,0.0,1.0,0.0,1.0,...,,1,,,,0,,Stable,Poor sleep duration or efficiency; Low deep sl...,Health parameters appear stable.
2,2021-10-08,,,,,,,,,,...,,0,0.666667,,,0,,Stable,Excessive physical strain,Health parameters appear stable.
3,2021-10-09,525.850000,533.983333,0.984769,0.0,0.0,0.0,1.0,0.0,0.0,...,,0,0.500000,,,0,,Stable,Low deep sleep recovery,Health parameters appear stable.
4,2021-10-10,621.650000,625.983333,0.993078,0.0,0.0,0.0,1.0,0.0,0.0,...,,0,0.400000,0.400000,0.000000,0,0.547723,Stable,Low deep sleep recovery,Health parameters appear stable.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1385,2025-12-28,,,,,,,,,,...,0.05,0,0.285714,0.500000,-0.214286,0,0.518875,Recovering,,Recovery trend observed. Physiological signals...
1386,2025-12-29,,,,,,,,,,...,0.05,1,0.428571,0.500000,-0.071429,0,0.518875,Stable,Low physical activity,Health parameters appear stable.
1387,2025-12-30,,,,,,,,,,...,0.05,1,0.571429,0.500000,0.071429,0,0.518875,Stable,Low physical activity,Health parameters appear stable.
1388,2025-12-31,,,,,,,,,,...,0.05,1,0.571429,0.571429,0.000000,0,0.513553,Stable,Low physical activity,Health parameters appear stable.


In [71]:
health_timeline["risk_score"] = (
    0.30 * health_timeline["sleep_risk_evidence"] +
    0.20 * health_timeline["activity_risk_evidence"] +
    0.30 * health_timeline["hr_risk_evidence"] +
    0.20 * health_timeline["early_warning_flag"]
)

health_timeline["risk_score"] = (
    health_timeline["risk_score"] * 100
).round(1)

In [72]:
def intervention_tier(score):
    if score >= 70:
        return "Immediate Attention"
    elif score >= 40:
        return "Monitor Closely"
    elif score >= 20:
        return "Lifestyle Adjustment"
    else:
        return "Stable"

In [73]:
health_timeline["intervention_tier"] = (
    health_timeline["risk_score"].apply(intervention_tier)
)

In [74]:
def intervention_recommendation(row):
    actions = []

    # Sleep-based interventions
    if row.get("poor_sleep_flag", 0) == 1:
        actions.append("Improve sleep schedule and duration")
    if row.get("fragmented_sleep_flag", 0) == 1:
        actions.append("Reduce nighttime disruptions")

    # Activity-based interventions
    if row.get("low_activity_flag", 0) == 1:
        actions.append("Increase light physical activity")
    if row.get("high_activity_flag", 0) == 1:
        actions.append("Reduce physical overexertion")

    # Heart-based interventions
    if row.get("high_hr_variability_flag", 0) == 1:
        actions.append("Incorporate stress-reduction techniques")
    if row.get("elevated_hr_flag", 0) == 1:
        actions.append("Monitor heart rate and avoid exertion")

    # Temporal escalation
    if row.get("early_warning_flag", 0) == 1:
        actions.append("Close monitoring recommended over next few days")

    if not actions:
        return "No immediate intervention required"

    return "; ".join(actions)

In [75]:
health_timeline["intervention_recommendation"] = (
    health_timeline.apply(intervention_recommendation, axis=1)
)

In [76]:
health_timeline["is_risky_day"] = (
    health_timeline["health_risk_state"] != "Stable"
).astype(int)

# Compute streak
streak = 0
streaks = []

for val in health_timeline["is_risky_day"]:
    if val == 1:
        streak += 1
    else:
        streak = 0
    streaks.append(streak)

health_timeline["risk_streak_days"] = streaks

In [77]:
health_timeline["is_elevated_day"] = (
    health_timeline["health_risk_state"] == "Elevated Risk"
).astype(int)

streak = 0
elevated_streaks = []

for val in health_timeline["is_elevated_day"]:
    if val == 1:
        streak += 1
    else:
        streak = 0
    elevated_streaks.append(streak)

health_timeline["elevated_risk_streak_days"] = elevated_streaks

In [78]:
health_timeline["risk_days_last_14"] = (
    health_timeline["is_risky_day"]
    .rolling(window=14, min_periods=5)
    .sum()
)

In [79]:
health_timeline["risk_escalation_flag"] = (
    (health_timeline["risk_streak_days"] >= 3) |
    (health_timeline["elevated_risk_streak_days"] >= 2) |
    (health_timeline["risk_days_last_14"] >= 6)
).astype(int)

In [80]:
def refined_intervention(row):
    if row["risk_escalation_flag"] == 1:
        return "Clinical Attention Recommended"
    return row["intervention_tier"]

health_timeline["final_intervention_tier"] = (
    health_timeline.apply(refined_intervention, axis=1)
)

In [81]:
export_cols = [
    "date",
    "sleep_available", "steps_available", "hr_available",

    # Sleep
    "total_sleep_min", "sleep_efficiency", "deep_pct", "rem_pct",
    "fragmented_sleep_flag", "sleep_extreme_flag", "stage_available_flag",

    # Activity
    "total_steps", "intervals",
    "low_activity_flag", "high_activity_flag", "false_spm_flag",

    # Heart Rate
    "avg_hr", "min_hr", "max_hr", "hr_std",
    "high_hr_minutes", "low_hr_minutes",
    "elevated_hr_flag", "high_hr_variability_flag",

    # Intelligence
    "health_risk_state", "daily_risk_score", "risk_score",
    "early_warning_flag",
    "risk_streak_days", "elevated_risk_streak_days",
    "risk_days_last_14",
    "risk_escalation_flag",
    "final_intervention_tier",
    "intervention_recommendation",
    "risk_trajectory_state",
    "clinical_summary"
]

health_export = health_timeline[export_cols].copy()

In [82]:
health_export.shape

(1390, 36)

In [83]:
health_export.isna().mean().sort_values(ascending=False)

sleep_efficiency               0.815827
total_sleep_min                0.702878
deep_pct                       0.702878
rem_pct                        0.702878
fragmented_sleep_flag          0.702878
sleep_extreme_flag             0.702878
stage_available_flag           0.702878
hr_std                         0.587770
max_hr                         0.584892
avg_hr                         0.584892
high_hr_variability_flag       0.584892
elevated_hr_flag               0.584892
low_hr_minutes                 0.584892
high_hr_minutes                0.584892
min_hr                         0.584892
low_activity_flag              0.005036
false_spm_flag                 0.005036
high_activity_flag             0.005036
intervals                      0.005036
total_steps                    0.005036
risk_days_last_14              0.002878
final_intervention_tier        0.000000
risk_streak_days               0.000000
intervention_recommendation    0.000000
risk_trajectory_state          0.000000


In [84]:
health_export["final_intervention_tier"].value_counts()

final_intervention_tier
Clinical Attention Recommended    880
Stable                            404
Lifestyle Adjustment               96
Monitor Closely                    10
Name: count, dtype: int64

In [85]:
health_export.sample(5)

Unnamed: 0,date,sleep_available,steps_available,hr_available,total_sleep_min,sleep_efficiency,deep_pct,rem_pct,fragmented_sleep_flag,sleep_extreme_flag,...,risk_score,early_warning_flag,risk_streak_days,elevated_risk_streak_days,risk_days_last_14,risk_escalation_flag,final_intervention_tier,intervention_recommendation,risk_trajectory_state,clinical_summary
540,2023-09-05,0,1,0,,,,,,,...,0.0,0,0,0,0.0,0,Stable,Reduce physical overexertion,Stable,Health parameters appear stable.
1067,2025-02-13,0,1,0,,,,,,,...,0.0,0,0,0,9.0,1,Clinical Attention Recommended,No immediate intervention required,Recovering,Recovery trend observed. Physiological signals...
640,2023-12-14,0,1,1,,,,,,,...,30.0,0,3,0,3.0,1,Clinical Attention Recommended,Reduce physical overexertion; Incorporate stre...,Stable,Health parameters appear stable.
96,2022-04-17,0,1,1,,,,,,,...,0.0,0,0,0,6.0,1,Clinical Attention Recommended,Reduce physical overexertion,Recovering,Recovery trend observed. Physiological signals...
95,2022-04-16,0,1,1,,,,,,,...,0.0,0,0,0,6.0,1,Clinical Attention Recommended,Reduce physical overexertion,Recovering,Recovery trend observed. Physiological signals...


In [86]:
health_export

Unnamed: 0,date,sleep_available,steps_available,hr_available,total_sleep_min,sleep_efficiency,deep_pct,rem_pct,fragmented_sleep_flag,sleep_extreme_flag,...,risk_score,early_warning_flag,risk_streak_days,elevated_risk_streak_days,risk_days_last_14,risk_escalation_flag,final_intervention_tier,intervention_recommendation,risk_trajectory_state,clinical_summary
0,2021-10-06,1,1,0,215.883333,0.420853,0.0,0.0,1.0,0.0,...,30.0,0,1,0,,0,Lifestyle Adjustment,Improve sleep schedule and duration; Reduce ni...,Stable,Health parameters appear stable.
1,2021-10-07,1,1,0,248.950000,0.861468,0.0,0.0,0.0,0.0,...,30.0,0,2,0,,0,Lifestyle Adjustment,Improve sleep schedule and duration,Stable,Health parameters appear stable.
2,2021-10-08,0,1,0,,,,,,,...,0.0,0,0,0,,0,Stable,Reduce physical overexertion,Stable,Health parameters appear stable.
3,2021-10-09,1,1,0,525.850000,0.984769,0.0,0.0,0.0,0.0,...,0.0,0,0,0,,0,Stable,No immediate intervention required,Stable,Health parameters appear stable.
4,2021-10-10,1,1,0,621.650000,0.993078,0.0,0.0,0.0,0.0,...,0.0,0,0,0,2.0,0,Stable,No immediate intervention required,Stable,Health parameters appear stable.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1385,2025-12-28,0,1,0,,,,,,,...,0.0,0,0,0,7.0,1,Clinical Attention Recommended,No immediate intervention required,Recovering,Recovery trend observed. Physiological signals...
1386,2025-12-29,0,1,0,,,,,,,...,20.0,0,1,0,7.0,1,Clinical Attention Recommended,Increase light physical activity,Stable,Health parameters appear stable.
1387,2025-12-30,0,1,0,,,,,,,...,20.0,0,2,0,7.0,1,Clinical Attention Recommended,Increase light physical activity,Stable,Health parameters appear stable.
1388,2025-12-31,0,1,0,,,,,,,...,20.0,0,3,0,8.0,1,Clinical Attention Recommended,Increase light physical activity,Stable,Health parameters appear stable.


In [87]:
health_export.to_excel(
    "health_timeline_intelligence.xlsx",
    index=False
)