In [67]:
import subprocess

subprocess.run(["python3", "/Users/tolgasabanoglu/Desktop/github/spatiotemporal/scripts/parse_garmin.py"])

🔐 Logging into Garmin Connect...

📆 Fetching Garmin data from 2025-02-25 to 2025-06-29


📅 2025-02-25
📦 Fetching steps...
✅ Saved steps → steps_2025-02-25.json
📦 Fetching sleep...
✅ Saved sleep → sleep_2025-02-25.json
📦 Fetching stress...
✅ Saved stress → stress_2025-02-25.json
📦 Fetching body_battery...
✅ Saved body_battery → body_battery_2025-02-25.json
📦 Fetching heart_rate...
✅ Saved heart_rate → heart_rate_2025-02-25.json

📅 2025-02-26
📦 Fetching steps...
✅ Saved steps → steps_2025-02-26.json
📦 Fetching sleep...
✅ Saved sleep → sleep_2025-02-26.json
📦 Fetching stress...
✅ Saved stress → stress_2025-02-26.json
📦 Fetching body_battery...
✅ Saved body_battery → body_battery_2025-02-26.json
📦 Fetching heart_rate...
✅ Saved heart_rate → heart_rate_2025-02-26.json

📅 2025-02-27
📦 Fetching steps...
✅ Saved steps → steps_2025-02-27.json
📦 Fetching sleep...
✅ Saved sleep → sleep_2025-02-27.json
📦 Fetching stress...
✅ Saved stress → stress_2025-02-27.json
📦 Fetching body_battery...
✅ Saved 

CompletedProcess(args=['python3', '/Users/tolgasabanoglu/Desktop/github/spatiotemporal/scripts/parse_garmin.py'], returncode=0)

In [68]:
import os
import json
from glob import glob
import pandas as pd

# Directory with your raw Garmin data
raw_dir = "/Users/tolgasabanoglu/Desktop/github/spatiotemporal/data/raw/"
json_files = sorted(glob(os.path.join(raw_dir, "*.json")))

# Known Garmin data prefixes
file_types = ["steps", "stress", "sleep", "body_battery", "heart_rate"]

audit_rows = []

for ftype in file_types:
    matching = [f for f in json_files if os.path.basename(f).startswith(ftype)]
    entry = {
        "type": ftype,
        "file_count": len(matching),
        "format": "❓",
        "has_date": False,
        "sample_date": "—",
        "top_keys": []
    }
    
    for file in matching:
        try:
            with open(file) as f:
                data = json.load(f)

                if isinstance(data, list) and data:
                    entry["format"] = "list"
                    entry["top_keys"] = list(data[0].keys())
                    date = (
                        data[0].get("calendarDate") or
                        data[0].get("startGMT") or
                        data[0].get("startTimestampGMT")
                    )
                    if date:
                        entry["has_date"] = True
                        entry["sample_date"] = str(date)[:10]
                    break

                elif isinstance(data, dict):
                    entry["format"] = "dict"
                    entry["top_keys"] = list(data.keys())
                    date = (
                        data.get("calendarDate") or
                        data.get("date") or
                        data.get("startTimestampGMT")
                    )
                    if date:
                        entry["has_date"] = True
                        entry["sample_date"] = str(date)[:10]
                    break
        except Exception as e:
            entry["top_keys"] = [f"⚠️ Error: {e}"]

    audit_rows.append(entry)

# Show audit results as table
df_audit = pd.DataFrame(audit_rows)
df_audit = df_audit[["type", "file_count", "format", "has_date", "sample_date", "top_keys"]]

# Print as plain table
print("\n📊 Garmin JSON File Audit:\n")
print(df_audit.to_string(index=False))



📊 Garmin JSON File Audit:

        type  file_count format  has_date sample_date                                                                                                                                                                                                                                                                                                top_keys
       steps         124   list      True  2025-02-24                                                                                                                                                                                                                          [startGMT, endGMT, steps, pushes, primaryActivityLevel, activityLevelConstant]
      stress         125   dict      True  2025-02-25 [userProfilePK, calendarDate, startTimestampGMT, endTimestampGMT, startTimestampLocal, endTimestampLocal, maxStressLevel, avgStressLevel, stressChartValueOffset, stressChartYAxisOrigin, stressValueDescriptorsDTOList, s

In [69]:
import os
import json
import pandas as pd
from glob import glob
from collections import defaultdict

# ---- Setup ----
raw_dir = "/Users/tolgasabanoglu/Desktop/github/spatiotemporal/data/raw/"
json_files = sorted(glob(os.path.join(raw_dir, "*.json")))
summary = defaultdict(dict)

# ---- Parse Files ----
for file in json_files:
    fname = os.path.basename(file)

    try:
        with open(file) as f:
            data = json.load(f)
    except Exception as e:
        print(f"⚠️ Could not load {fname}: {e}")
        continue

    # ---- STEPS ----
    if fname.startswith("steps") and isinstance(data, list) and data:
        date = data[0].get("startGMT", "")[:10]
        steps = sum(item.get("steps", 0) for item in data if isinstance(item, dict))
        summary[date]["steps"] = steps

    # ---- STRESS ----
    elif fname.startswith("stress") and isinstance(data, dict):
        date = data.get("calendarDate")
        if date:
            summary[date]["avg_stress"] = data.get("avgStressLevel")
            summary[date]["max_stress"] = data.get("maxStressLevel")

    # ---- BODY BATTERY ----
    elif fname.startswith("body_battery"):
        if isinstance(data, list):
            for entry in data:
                if isinstance(entry, dict):
                    date = entry.get("date")
                    if date:
                        summary[date]["body_charged"] = entry.get("charged")
                        summary[date]["body_drained"] = entry.get("drained")
        elif isinstance(data, dict):
            date = data.get("date")
            if date:
                summary[date]["body_charged"] = data.get("charged")
                summary[date]["body_drained"] = data.get("drained")

    # ---- HEART RATE ----
    elif fname.startswith("heart_rate") and isinstance(data, dict):
        date = data.get("calendarDate")
        if date:
            summary[date]["resting_hr"] = data.get("restingHeartRate")

    # ---- SLEEP ----
    elif fname.startswith("sleep") and isinstance(data, dict):
        record = {}
        date = None

        daily = data.get("dailySleepDTO")
        if isinstance(daily, dict):
            date = daily.get("calendarDate") or daily.get("sleepEndTimestampGMT", "")[:10]
            record.update({
                "sleep_score": daily.get("overallSleepScore"),
                "total_sleep_s": daily.get("sleepTimeSeconds"),
                "deep_sleep_s": daily.get("deepSleepSeconds"),
                "light_sleep_s": daily.get("lightSleepSeconds"),
                "rem_sleep_s": daily.get("remSleepSeconds"),
                "awake_s": daily.get("awakeSleepSeconds"),
                "restless_moments": daily.get("restlessMomentsCount")
            })

        rem = data.get("remSleepData")
        if isinstance(rem, dict):
            record.update({
                "rem_count": rem.get("remSleepCount"),
                "avg_rem_duration_s": rem.get("avgRemSleepSeconds")
            })

        if date:
            summary[date].update(record)

# ---- Create DataFrame ----
df = pd.DataFrame.from_dict(summary, orient="index").reset_index()
df = df.rename(columns={"index": "date"})
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df = df.sort_values("date")

# ---- Convert seconds → hours ----
sleep_cols = ["total_sleep_s", "deep_sleep_s", "light_sleep_s", "rem_sleep_s", "awake_s", "avg_rem_duration_s"]
for col in sleep_cols:
    if col in df.columns:
        df[col.replace("_s", "_h")] = df[col] / 3600

# ---- Preview ----
print(df.head(10))


          date  body_charged  body_drained  resting_hr  sleep_score  \
125 2025-02-24           NaN           NaN         NaN          NaN   
0   2025-02-25           0.0          13.0        75.0          NaN   
1   2025-02-26          51.0          51.0        64.0          NaN   
2   2025-02-27          50.0          23.0        68.0          NaN   
3   2025-02-28          70.0          71.0        59.0          NaN   
4   2025-03-01          53.0          68.0        60.0          NaN   
5   2025-03-02          26.0          37.0        65.0          NaN   
6   2025-03-03          72.0          48.0        65.0          NaN   
7   2025-03-04          42.0          65.0        59.0          NaN   
8   2025-03-05          75.0          67.0        57.0          NaN   

     total_sleep_s  deep_sleep_s  light_sleep_s  rem_sleep_s  awake_s  \
125            NaN           NaN            NaN          NaN      NaN   
0              NaN           NaN            NaN          NaN      NaN   

In [70]:


# Assume df is already your loaded Garmin summary DataFrame
# If not, load it first (e.g., from a CSV or JSON)

# ---- 1. Remove unwanted columns ----
columns_to_drop = ["sleep_score", "restless_moments"]
df = df.drop(columns=[col for col in columns_to_drop if col in df.columns], errors="ignore")

# ---- 2. Ensure date is datetime and sorted ----
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df = df.sort_values("date")

# ---- 3. Forward-fill then backward-fill missing values ----
df = df.fillna(method="ffill").fillna(method="bfill")

# ---- 4. Preview cleaned data ----
print(df.head())

# Optional: save to CSV
# df_filled.to_csv("garmin_cleaned.csv", index=False)


          date  body_charged  body_drained  resting_hr  total_sleep_s  \
125 2025-02-24           0.0          13.0        75.0        23340.0   
0   2025-02-25           0.0          13.0        75.0        23340.0   
1   2025-02-26          51.0          51.0        64.0        23340.0   
2   2025-02-27          50.0          23.0        68.0        31440.0   
3   2025-02-28          70.0          71.0        59.0        38985.0   

     deep_sleep_s  light_sleep_s  rem_sleep_s  awake_s    steps  avg_stress  \
125        3300.0        14280.0       5760.0     60.0    525.0        54.0   
0          3300.0        14280.0       5760.0     60.0   9953.0        54.0   
1          3300.0        14280.0       5760.0     60.0   2926.0        46.0   
2          4380.0        23400.0       3660.0   4680.0   8521.0        43.0   
3          3120.0        23880.0      12000.0    540.0  17681.0        25.0   

     max_stress  total_hleep_h  deep_hleep_h  light_hleep_h  rem_hleep_h  \
125       


DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.



In [71]:
import pandas as pd
import plotly.graph_objects as go
import plotly.subplots as sp
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Ensure steps column is numeric
df["avg_stress"] = pd.to_numeric(df["avg_stress"], errors="coerce")

# Select valid numeric columns to compare with steps
columns_to_plot = [
    col for col in df.columns
    if col not in ["date", "avg_stress"] and pd.api.types.is_numeric_dtype(df[col])
]

# Filter out columns with insufficient non-null pairings
valid_pairs = []
for col in columns_to_plot:
    temp_df = df[["avg_stress", col]].dropna()
    if len(temp_df) >= 2:
        valid_pairs.append(col)

# Prepare plot
fig = sp.make_subplots(
    rows=len(valid_pairs), cols=1, shared_xaxes=False,
    subplot_titles=[f"Avg Stress vs. {col.replace('_', ' ').title()}" for col in valid_pairs]
)

# Plot with regression
for i, col in enumerate(valid_pairs, start=1):
    temp_df = df[["avg_stress", col]].dropna()
    X = temp_df["avg_stress"].values.reshape(-1, 1)
    y = temp_df[col].values

    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)
    r2 = r2_score(y, y_pred)

    fig.add_trace(
        go.Scatter(x=temp_df["avg_stress"], y=temp_df[col], mode='markers', name=col),
        row=i, col=1
    )

    fig.add_trace(
        go.Scatter(x=temp_df["avg_stress"], y=y_pred, mode='lines',
                   line=dict(dash='dash'), name=f"{col} Fit"),
        row=i, col=1
    )

    fig.add_annotation(
        xref="paper", yref="paper",
        x=0.95, y=1 - (i - 1) / len(valid_pairs),
        text=f"R² = {r2:.2f}", showarrow=False, font=dict(size=12)
    )

fig.update_layout(
    height=350 * len(valid_pairs),
    width=1000,
    title_text="Avg Stress vs Other Metrics with Regression Lines and R²",
    showlegend=False
)

fig.show()
