In [2]:
import json
import pandas as pd
import os
from datetime import datetime, timezone


In [3]:
DATA_DIR = "../data/"
garmin_file = os.path.join(DATA_DIR, "garmin_health_data.json")
csv_filename = os.path.join(DATA_DIR, "garmin_data.csv")

In [4]:
print(garmin_file)

../data/garmin_health_data.json


In [5]:
with open(garmin_file, "r") as f:
    garmin_data = json.load(f)
print("✅ Loaded Garmin health data")

✅ Loaded Garmin health data


In [6]:
processed_data = []

# Iterate through Garmin data (by date)
for date, health in garmin_data.items():
    # Safely get heart rate values
    heart_rate_values = health.get("heart_rate", [])
    if not heart_rate_values:
        continue  # Skip days with no heart rate data

    # Convert heart rate timestamps to UTC
    hr_data = {
        datetime.fromtimestamp(ts / 1000, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"): hr
        for ts, hr in heart_rate_values
    }

    # Helper function for extracting time series data
    def extract_time_series(data, key):
        if not data or key not in data or data[key] is None:
            return {}
        return {
            datetime.fromtimestamp(ts / 1000, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"): value
            for ts, value in data[key]
        }

    # Extract all health metrics
    stress_data = extract_time_series(health.get("stress", {}), "stressValuesArray")
    respiration_data = extract_time_series(health.get("respiration", {}), "respirationValuesArray")
    body_battery_data = extract_time_series(health.get("body_battery", [{}])[0], "bodyBatteryValuesArray")
    spo2_data = extract_time_series(health.get("spo2"), "spO2HourlyAverages")

    # Extract HRV readings
    hrv_data = {
        entry["readingTimeGMT"]: entry["hrvValue"]
        for entry in health.get("hrvReadings", [])
    }

    # Get sleep score
    sleep_score = health.get("sleep_score")

    # Initialize last known values
    last_stress = last_resp = last_body_battery = last_spo2 = last_hrv = None

    # Merge data by timestamps
    for timestamp, heart_rate in hr_data.items():
        # Update last known values
        last_stress = stress_data.get(timestamp, last_stress)
        last_resp = respiration_data.get(timestamp, last_resp)
        last_body_battery = body_battery_data.get(timestamp, last_body_battery)
        last_spo2 = spo2_data.get(timestamp, last_spo2)
        last_hrv = hrv_data.get(timestamp, last_hrv)

        # Store processed data
        processed_data.append({
            "timestamp": timestamp,
            "heart_rate": heart_rate,
            "stress": last_stress,
            "respiration": last_resp,
            "body_battery": last_body_battery,
            "spo2": last_spo2,
            "hrv": last_hrv,
            "sleep_score": sleep_score
        })

print("✅ Processed Garmin health data")

✅ Processed Garmin health data


In [7]:
df = pd.DataFrame(processed_data)
display(df.head())
print("\nDataset shape:", df.shape)

Unnamed: 0,timestamp,heart_rate,stress,respiration,body_battery,spo2,hrv,sleep_score
0,2025-04-01T22:00:00Z,62.0,31.0,,,94.0,,50.0
1,2025-04-01T22:02:00Z,64.0,31.0,18.0,,94.0,,50.0
2,2025-04-01T22:04:00Z,63.0,31.0,17.0,,94.0,,50.0
3,2025-04-01T22:06:00Z,64.0,28.0,16.0,,94.0,,50.0
4,2025-04-01T22:08:00Z,65.0,28.0,17.0,,94.0,,50.0



Dataset shape: (22005, 8)


In [8]:
print("Summary statistics:")
display(df.describe())

print("\nMissing values:")
display(df.isnull().sum())

Summary statistics:


Unnamed: 0,heart_rate,stress,respiration,body_battery,spo2,sleep_score
count,21961.0,22001.0,21959.0,18941.0,16301.0,19030.0
mean,71.203952,27.865233,12.344278,44.137955,94.26072,74.729427
std,18.846895,26.647508,6.101613,28.200988,2.323868,14.898444
min,40.0,-2.0,-2.0,5.0,85.0,35.0
25%,58.0,6.0,12.0,20.0,93.0,63.0
50%,67.0,22.0,14.0,42.0,94.0,80.0
75%,81.0,43.0,16.0,62.0,96.0,86.0
max,199.0,99.0,25.0,100.0,100.0,95.0



Missing values:


timestamp           0
heart_rate         44
stress              4
respiration        46
body_battery     3064
spo2             5704
hrv             22005
sleep_score      2975
dtype: int64

In [9]:
df.to_csv(csv_filename, index=False)
print(f"✅ Garmin health data saved to {csv_filename}")

✅ Garmin health data saved to ../data/garmin_data.csv
