In [None]:
import json
import os
import sys
import pandas as pd

sys.path.insert(0, os.path.dirname(os.getcwd()))

In [None]:
def load_data(path):
    with open(path, "r") as f:
        data = json.load(f)
    return data

In [None]:
from app.oura_client import HeartRateData, HeartRateSample, _parse_sleep_data
sleep = load_data("./user_data/sleep.json")
sleep = [
    _parse_sleep_data(sleep_record) for sleep_record in sleep['data']
]


In [None]:
heartrate = load_data("./user_data/heartrate.json")
heartrate = [
                HeartRateSample(
                    bpm=item.get("bpm", 0),
                    source=item.get("source", ""),
                    timestamp=item.get("timestamp", ""),
                )
                for item in heartrate.get("data", [])
            ]
heartrate = HeartRateData(data=heartrate)

In [None]:
from app.analytics import oura_sleep_to_dataframe, oura_heartrate_to_dataframe

sleep_df = oura_sleep_to_dataframe(sleep)
heartrate_df = oura_heartrate_to_dataframe(heartrate)

In [None]:
# Diagnostic: Check date ranges of sleep vs heartrate data
from datetime import timedelta

# HR data range
hr_start = heartrate_df['timestamp'].min()
hr_end = heartrate_df['timestamp'].max()

# Sleep data range (for long_sleep only)
long_sleep_df = sleep_df[sleep_df['type'] == 'long_sleep']
sleep_start = pd.to_datetime(long_sleep_df['bedtime_start']).min()
sleep_end = pd.to_datetime(long_sleep_df['bedtime_end']).max()

print("Heart Rate Data Range:")
print(f"  From: {hr_start}")
print(f"  To:   {hr_end}")
print(f"  Total days: {(hr_end - hr_start).days}")

print("\nSleep Data Range (long_sleep only):")
print(f"  From: {sleep_start}")
print(f"  To:   {sleep_end}")
print(f"  Total nights: {len(long_sleep_df)}")

# Check the last 28 days specifically
last_28_start = hr_end - timedelta(days=28)
last_28_sleep = long_sleep_df[pd.to_datetime(long_sleep_df['bedtime_start']) >= last_28_start]
print(f"\nSleep records in last 28 days: {len(last_28_sleep)}")
print(f"Expected: ~28 nights")

# Check for gaps in sleep data
if len(last_28_sleep) > 0:
    print(f"\nLast 28 days sleep coverage:")
    for _, row in last_28_sleep.iterrows():
        print(f"  {row['day']}: {pd.to_datetime(row['bedtime_start']).strftime('%H:%M')} - {pd.to_datetime(row['bedtime_end']).strftime('%H:%M')}")

In [None]:
heartrate_df["source"].value_counts()

In [None]:
(12000*5)/(60*24)

In [None]:
from datetime import datetime, timedelta
from app.analytics import get_sleep_intervals, filter_hr_outside_sleep, resample_heartrate
import matplotlib.pyplot as plt

# Get the last 28 days of data
end_date = heartrate_df['timestamp'].max()
start_date = end_date - timedelta(days=28)

# Filter to last 28 days
last_28_days_hr = heartrate_df[heartrate_df['timestamp'] >= start_date].copy()

# Filter out sleep periods and resample using analytics functions
# Pass start_date and end_date to get_sleep_intervals to include fallback intervals for missing nights
sleep_intervals = get_sleep_intervals(sleep_df, start_date.date(), end_date.date())
filtered_hr, _ = filter_hr_outside_sleep(last_28_days_hr, sleep_intervals)
waking_hr = resample_heartrate(filtered_hr)
waking_hr.set_index('timestamp', inplace=True)
resampled_hr = waking_hr['bpm'].dropna()

# Create histogram
plt.figure(figsize=(12, 6))
plt.hist(resampled_hr.values, bins=50, edgecolor='black', alpha=0.7)
plt.xlabel('Heart Rate (bpm)')
plt.ylabel('Frequency')
plt.title(f'Waking Heart Rate Distribution (Last 28 Days)\n{len(resampled_hr)} samples')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Add samples per hour analysis
hourly_counts = waking_hr.resample('h').size()
hourly_counts = hourly_counts[hourly_counts > 0]

plt.figure(figsize=(12, 6))
plt.hist(hourly_counts.values, bins=30, edgecolor='black', alpha=0.7)
plt.xlabel('Samples per Hour')
plt.ylabel('Frequency')
plt.title(f'Distribution of Heart Rate Samples per Hour (Last 28 Days)\n{len(hourly_counts)} hours with data')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Add gap analysis between consecutive samples
time_diffs = waking_hr.index.to_series().diff()
gaps_minutes = time_diffs.dt.total_seconds() / 60
gaps_minutes = gaps_minutes[gaps_minutes.notna() & (gaps_minutes > 0)]

plt.figure(figsize=(12, 6))
plt.hist(gaps_minutes.values, bins=50, edgecolor='black', alpha=0.7, range=(0, 60))
plt.xlabel('Gap Between Samples (minutes)')
plt.ylabel('Frequency')
plt.title(f'Distribution of Gaps Between Consecutive Heart Rate Samples (Last 28 Days)\n{len(gaps_minutes)} gaps')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"Date range: {start_date.date()} to {end_date.date()}")
print(f"\nGap statistics:")
print(f"Median gap: {gaps_minutes.median():.1f} minutes")
print(f"Mean gap: {gaps_minutes.mean():.1f} minutes")
print(f"Max gap: {gaps_minutes.max():.1f} minutes")

# Samples per hour of day (0-23) analysis
waking_hr['hour_of_day'] = waking_hr.index.hour
hourly_distribution = waking_hr['hour_of_day'].value_counts().sort_index()

# Ensure all 24 hours are represented
all_hours = pd.Series(0, index=range(24))
all_hours.update(hourly_distribution)

plt.figure(figsize=(12, 6))
plt.bar(range(24), all_hours.values, edgecolor='black', alpha=0.7)
plt.xlabel('Hour of Day')
plt.ylabel('Number of Samples')
plt.title(f'Heart Rate Samples by Hour of Day (Last 28 Days)')
plt.xticks(range(24))
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

print(f"Total waking HR samples (resampled): {len(resampled_hr)}")
print(f"Mean HR: {resampled_hr.mean():.1f} bpm")
print(f"Median HR: {resampled_hr.median():.1f} bpm")
print(f"Min HR: {resampled_hr.min():.0f} bpm")
print(f"Max HR: {resampled_hr.max():.0f} bpm")

In [None]:
from app.analytics import get_sleep_intervals, filter_hr_outside_sleep, resample_heartrate
import numpy as np
from numpy.polynomial import polynomial as P


# Pass start and end dates to get_sleep_intervals to generate fallback intervals for missing nights
sleep_intervals = get_sleep_intervals(sleep_df, heartrate_df.day.min(), heartrate_df.day.max())
filtered_hr, _ = filter_hr_outside_sleep(heartrate_df, sleep_intervals)
waking_hr_all = resample_heartrate(filtered_hr)
waking_hr_all.set_index('timestamp', inplace=True)

# Add 'day' column (date part of timestamp)
waking_hr_all['day'] = waking_hr_all.index.date
# Add month column
waking_hr_all['month'] = waking_hr_all.index.to_period('M')

# Calculate p20, p50, p80 per month
monthly_stats = waking_hr_all.groupby('month')['bpm'].agg([
    ('p20', lambda x: x.quantile(0.20)),
    ('p50', lambda x: x.quantile(0.50)),
    ('p80', lambda x: x.quantile(0.80)),
    ('p95', lambda x: x.quantile(0.95))
]).reset_index()

# Convert period to timestamp for plotting
monthly_stats['month'] = monthly_stats['month'].dt.to_timestamp()

# Plot
plt.figure(figsize=(14, 7))
plt.plot(monthly_stats['month'], monthly_stats['p20'], marker='o', label='P20', linewidth=2)
plt.plot(monthly_stats['month'], monthly_stats['p50'], marker='o', label='P50 (Median)', linewidth=2)
plt.plot(monthly_stats['month'], monthly_stats['p80'], marker='o', label='P80', linewidth=2)

# Add trend lines

# Fit linear trend for each percentile
x_numeric = np.arange(len(monthly_stats))

# P20 trend
p20_coef = np.polyfit(x_numeric, monthly_stats['p20'], 1)
p20_trend = np.polyval(p20_coef, x_numeric)
plt.plot(monthly_stats['month'], p20_trend, '--', alpha=0.5, color='C0', linewidth=1)

# P50 trend
p50_coef = np.polyfit(x_numeric, monthly_stats['p50'], 1)
p50_trend = np.polyval(p50_coef, x_numeric)
plt.plot(monthly_stats['month'], p50_trend, '--', alpha=0.5, color='C1', linewidth=1)

# P80 trend
p80_coef = np.polyfit(x_numeric, monthly_stats['p80'], 1)
p80_trend = np.polyval(p80_coef, x_numeric)
plt.plot(monthly_stats['month'], p80_trend, '--', alpha=0.5, color='C2', linewidth=1)

# P95 trend
p95_coef = np.polyfit(x_numeric, monthly_stats['p95'], 1)
p95_trend = np.polyval(p95_coef, x_numeric)
plt.plot(monthly_stats['month'], p95_trend, '--', alpha=0.5, color='C3', linewidth=1)
plt.plot(monthly_stats['month'], monthly_stats['p95'], marker='o', label='P95', linewidth=2)
plt.fill_between(monthly_stats['month'], monthly_stats['p20'], monthly_stats['p80'], alpha=0.2)
plt.xlabel('Month')
plt.ylabel('Heart Rate (bpm)')
plt.title('Monthly Waking Heart Rate Percentiles (P20, P50, P80, P95)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
# Add 6-month rolling trend lines
# Calculate rolling 6-month trends for each percentile
monthly_stats_sorted = monthly_stats.sort_values('month')

for i in range(len(monthly_stats_sorted)):
    if i >= 5:  # Need at least 6 months for a 6-month trend
        # Get last 6 months including current
        last_6_months = monthly_stats_sorted.iloc[i-5:i+1]
        x_6m = np.arange(6)
        
        # Fit trends for last 6 months
        p20_6m = np.polyfit(x_6m, last_6_months['p20'], 1)
        p50_6m = np.polyfit(x_6m, last_6_months['p50'], 1)
        p80_6m = np.polyfit(x_6m, last_6_months['p80'], 1)
        p95_6m = np.polyfit(x_6m, last_6_months['p95'], 1)
        
        # Plot trend lines for this 6-month window
        p20_trend_6m = np.polyval(p20_6m, x_6m)
        p50_trend_6m = np.polyval(p50_6m, x_6m)
        p80_trend_6m = np.polyval(p80_6m, x_6m)
        p95_trend_6m = np.polyval(p95_6m, x_6m)
        
        plt.plot(last_6_months['month'], p20_trend_6m, ':', alpha=0.3, color='C0', linewidth=1.5)
        plt.plot(last_6_months['month'], p50_trend_6m, ':', alpha=0.3, color='C1', linewidth=1.5)
        plt.plot(last_6_months['month'], p80_trend_6m, ':', alpha=0.3, color='C2', linewidth=1.5)
        plt.plot(last_6_months['month'], p95_trend_6m, ':', alpha=0.3, color='C3', linewidth=1.5)
print(f"Total months: {len(monthly_stats)}")
print(f"\nMonthly statistics:")
print(monthly_stats.to_string(index=False))