In [1]:
# Cell 1: Import required libraries
import json
import pandas as pd
import os
from datetime import datetime, timezone


In [2]:
# Cell 2: Define file paths
DATA_DIR = "../data/"
garmin_file = os.path.join(DATA_DIR, "garmin_health_data.json")
csv_filename = os.path.join(DATA_DIR, "garmin_health_data.csv")

In [4]:
print(garmin_file)

data/garmin_health_data.json


In [3]:
# Cell 3: Load Garmin health data
with open(garmin_file, "r") as f:
    garmin_data = json.load(f)
print("✅ Loaded Garmin health data")

FileNotFoundError: [Errno 2] No such file or directory: 'data/garmin_health_data.json'

In [None]:
# Cell 4: Process Garmin data
# List to store processed data
processed_data = []

# Iterate through Garmin data (by date)
for date, health in garmin_data.items():
    # Safely get heart rate values
    heart_rate_values = health.get("heart_rate", [])
    if not heart_rate_values:
        continue  # Skip days with no heart rate data

    # Convert heart rate timestamps to UTC
    hr_data = {
        datetime.fromtimestamp(ts / 1000, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"): hr
        for ts, hr in heart_rate_values
    }

    # Helper function for extracting time series data
    def extract_time_series(data, key):
        if not data or key not in data or data[key] is None:
            return {}
        return {
            datetime.fromtimestamp(ts / 1000, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"): value
            for ts, value in data[key]
        }

    # Extract all health metrics
    stress_data = extract_time_series(health.get("stress", {}), "stressValuesArray")
    respiration_data = extract_time_series(health.get("respiration", {}), "respirationValuesArray")
    body_battery_data = extract_time_series(health.get("body_battery", [{}])[0], "bodyBatteryValuesArray")
    spo2_data = extract_time_series(health.get("spo2"), "spO2HourlyAverages")

    # Extract HRV readings
    hrv_data = {
        entry["readingTimeGMT"]: entry["hrvValue"]
        for entry in health.get("hrvReadings", [])
    }

    # Get sleep score
    sleep_score = health.get("sleep_score", "No Data")

    # Initialize last known values
    last_stress = last_resp = last_body_battery = last_spo2 = last_hrv = None

    # Merge data by timestamps
    for timestamp, heart_rate in hr_data.items():
        # Update last known values
        last_stress = stress_data.get(timestamp, last_stress)
        last_resp = respiration_data.get(timestamp, last_resp)
        last_body_battery = body_battery_data.get(timestamp, last_body_battery)
        last_spo2 = spo2_data.get(timestamp, last_spo2)
        last_hrv = hrv_data.get(timestamp, last_hrv)

        # Store processed data
        processed_data.append({
            "timestamp": timestamp,
            "heart_rate": heart_rate,
            "stress": last_stress,
            "respiration": last_resp,
            "body_battery": last_body_battery,
            "spo2": last_spo2,
            "hrv": last_hrv,
            "sleep_score": sleep_score
        })

print("✅ Processed Garmin health data")

In [None]:
# Cell 5: Convert to DataFrame and display first few rows
df = pd.DataFrame(processed_data)
display(df.head())
print("\nDataset shape:", df.shape)

In [None]:
# Cell 6: Basic data analysis
print("Summary statistics:")
display(df.describe())

print("\nMissing values:")
display(df.isnull().sum())

In [None]:
# Cell 7: Save to CSV
df.to_csv(csv_filename, index=False)
print(f"✅ Garmin health data saved to {csv_filename}")