In [155]:
import json
import pandas as pd
import os
from datetime import datetime, timezone
import pytz


In [156]:
DATA_DIR = "../data/"
garmin_file = os.path.join(DATA_DIR, "garmin_health_data.json")
csv_filename = os.path.join(DATA_DIR, "garmin_data.csv")

In [157]:
print(garmin_file)

../data/garmin_health_data.json


In [158]:
with open(garmin_file, "r") as f:
    garmin_data = json.load(f)
print("✅ Loaded Garmin health data")

✅ Loaded Garmin health data


In [159]:
processed_data = []

# Iterate through Garmin data (by date)
for date, health in garmin_data.items():
    # Safely get heart rate values
    heart_rate_values = health.get("heart_rate", [])
    if not heart_rate_values:
        continue  # Skip days with no heart rate data

    # Convert heart rate timestamps to UTC
    hr_data = {
        datetime.fromtimestamp(ts / 1000, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"): hr
        for ts, hr in heart_rate_values
    }

    # Helper function for extracting time series data
    def extract_time_series(data, key):
        if not data or key not in data or data[key] is None:
            return {}
        return {
            datetime.fromtimestamp(ts / 1000, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"): value
            for ts, value in data[key]
        }

    # Extract all health metrics
    stress_data = extract_time_series(health.get("stress", {}), "stressValuesArray")
    respiration_data = extract_time_series(health.get("respiration", {}), "respirationValuesArray")
    body_battery_data = extract_time_series(health.get("body_battery", [{}])[0], "bodyBatteryValuesArray")
    spo2_data = extract_time_series(health.get("spo2"), "spO2HourlyAverages")
    hrv_avg = health.get("hrv_avg", {})

    # Extract HRV readings
    hrv_data = {
        entry["readingTimeGMT"]: entry["hrvValue"]
        for entry in health.get("hrvReadings", [])
    }

    # Get sleep score
    sleep_score = health.get("sleep_score")

    # Initialize last known values
    last_stress = last_resp = last_body_battery = last_spo2 = last_hrv = None

    # Merge data by timestamps
    for timestamp, heart_rate in hr_data.items():
        # Update last known values
        last_stress = stress_data.get(timestamp, last_stress)
        last_resp = respiration_data.get(timestamp, last_resp)
        last_body_battery = body_battery_data.get(timestamp, last_body_battery)
        last_spo2 = spo2_data.get(timestamp, last_spo2)
        last_hrv = hrv_data.get(timestamp, last_hrv)

        # Store processed data
        processed_data.append({
            "timestamp": timestamp,
            "heart_rate": heart_rate,
            "stress": last_stress,
            "respiration": last_resp,
            "body_battery": last_body_battery,
            "spo2": last_spo2,
            # "hrv": last_hrv,
            "sleep_score": sleep_score,
            "hrv_avg": hrv_avg
        })

print("✅ Processed Garmin health data")

✅ Processed Garmin health data


In [160]:
df = pd.DataFrame(processed_data)

# Add local time column
df['local_time'] = df['timestamp'].apply(lambda x: 
    datetime.strptime(x, "%Y-%m-%dT%H:%M:%SZ")
    .replace(tzinfo=timezone.utc)
    .astimezone(pytz.timezone('Europe/Madrid'))
    .strftime("%Y-%m-%dT%H:%M:%S")  # Removed %z to exclude the timezone offset
)

display(df.head())
print("\nDataset shape:", df.shape)

Unnamed: 0,timestamp,heart_rate,stress,respiration,body_battery,spo2,sleep_score,hrv_avg,local_time
0,2025-04-08T22:00:00Z,69.0,50.0,,,94.0,77.0,46.0,2025-04-09T00:00:00
1,2025-04-08T22:02:00Z,68.0,50.0,17.0,,94.0,77.0,46.0,2025-04-09T00:02:00
2,2025-04-08T22:04:00Z,72.0,50.0,17.0,,94.0,77.0,46.0,2025-04-09T00:04:00
3,2025-04-08T22:06:00Z,73.0,50.0,17.0,,94.0,77.0,46.0,2025-04-09T00:06:00
4,2025-04-08T22:08:00Z,74.0,50.0,17.0,,94.0,77.0,46.0,2025-04-09T00:08:00



Dataset shape: (24089, 9)


In [161]:
print("Summary statistics:")
display(df.describe())

print("\nMissing values:")
display(df.isnull().sum())

Summary statistics:


Unnamed: 0,heart_rate,stress,respiration,body_battery,spo2,sleep_score,hrv_avg
count,24043.0,24081.0,24038.0,20532.0,18330.0,20345.0,17201.0
mean,71.165911,28.05249,12.384724,44.406049,94.181724,74.84232,74.803965
std,18.711374,26.541341,6.105029,27.758542,2.33675,14.96346,17.029675
min,40.0,-2.0,-2.0,5.0,85.0,35.0,46.0
25%,58.0,6.0,12.0,23.0,93.0,63.0,62.0
50%,67.0,22.0,14.0,40.0,94.0,80.0,73.0
75%,81.0,43.0,16.0,62.0,96.0,86.0,85.0
max,199.0,99.0,25.0,100.0,100.0,95.0,107.0



Missing values:


timestamp          0
heart_rate        46
stress             8
respiration       51
body_battery    3557
spo2            5759
sleep_score     3744
hrv_avg         6888
local_time         0
dtype: int64

In [162]:
df.to_csv(csv_filename, index=False)
print(f"✅ Garmin health data saved to {csv_filename}")

✅ Garmin health data saved to ../data/garmin_data.csv
