In [None]:
import json
import os
import sys
import pandas as pd

sys.path.insert(0, os.path.dirname(os.getcwd()))

In [None]:
def load_data(path):
    with open(path, "r") as f:
        data = json.load(f)
    return data

In [None]:
from app.oura_client import HeartRateData, HeartRateSample, _parse_sleep_data
sleep = load_data("./user_data/sleep.json")
sleep = [
    _parse_sleep_data(sleep_record) for sleep_record in sleep['data']
]


In [None]:
heartrate = load_data("./user_data/heartrate.json")
heartrate = [
                HeartRateSample(
                    bpm=item.get("bpm", 0),
                    source=item.get("source", ""),
                    timestamp=item.get("timestamp", ""),
                )
                for item in heartrate.get("data", [])
            ]
heartrate = HeartRateData(data=heartrate)

In [None]:
from app.analytics import oura_sleep_to_dataframe, oura_heartrate_to_dataframe

sleep_df = oura_sleep_to_dataframe(sleep)
heartrate_df = oura_heartrate_to_dataframe(heartrate)

In [None]:
# Diagnostic: Check date ranges of sleep vs heartrate data
from datetime import timedelta

# HR data range
hr_start = heartrate_df['timestamp'].min()
hr_end = heartrate_df['timestamp'].max()

# Sleep data range (for long_sleep only)
long_sleep_df = sleep_df[sleep_df['type'] == 'long_sleep']
sleep_start = pd.to_datetime(long_sleep_df['bedtime_start']).min()
sleep_end = pd.to_datetime(long_sleep_df['bedtime_end']).max()

print("Heart Rate Data Range:")
print(f"  From: {hr_start}")
print(f"  To:   {hr_end}")
print(f"  Total days: {(hr_end - hr_start).days}")

print("\nSleep Data Range (long_sleep only):")
print(f"  From: {sleep_start}")
print(f"  To:   {sleep_end}")
print(f"  Total nights: {len(long_sleep_df)}")

# Check the last 28 days specifically
last_28_start = hr_end - timedelta(days=28)
last_28_sleep = long_sleep_df[pd.to_datetime(long_sleep_df['bedtime_start']) >= last_28_start]
print(f"\nSleep records in last 28 days: {len(last_28_sleep)}")
print(f"Expected: ~28 nights")

# Check for gaps in sleep data
if len(last_28_sleep) > 0:
    print(f"\nLast 28 days sleep coverage:")
    for _, row in last_28_sleep.iterrows():
        print(f"  {row['day']}: {pd.to_datetime(row['bedtime_start']).strftime('%H:%M')} - {pd.to_datetime(row['bedtime_end']).strftime('%H:%M')}")

In [None]:
# Check timezone handling issue
from app.analytics import get_sleep_intervals

sleep_intervals = get_sleep_intervals(sleep_df)

# Look at one specific sleep interval
if sleep_intervals:
    example_start, example_end = sleep_intervals[-1]  # Most recent
    print(f"Example sleep interval (most recent):")
    print(f"  Start: {example_start} (tz: {example_start.tz})")
    print(f"  End:   {example_end} (tz: {example_end.tz})")
    
    # Check HR timestamps
    print(f"\nExample HR timestamp:")
    print(f"  {heartrate_df['timestamp'].iloc[-1]} (tz: {heartrate_df['timestamp'].iloc[-1].tz})")
    
    # The comparison will be wrong if timezones differ!
    print(f"\nTimezone comparison issue:")
    print(f"  If sleep is 23:00 local (+01:00), that's 22:00 UTC")
    print(f"  But the code compares 23:00 vs 22:00 directly (as naive times)")
    
    # Count how many HR samples fall during the most recent sleep period
    recent_hr = heartrate_df[heartrate_df['timestamp'] >= example_start - pd.Timedelta(hours=5)]
    during_sleep = recent_hr[(recent_hr['timestamp'] >= example_start) & (recent_hr['timestamp'] <= example_end)]
    print(f"\nHR samples that WOULD be filtered for most recent sleep:")
    print(f"  {len(during_sleep)} samples")
    
    # Now try with proper timezone handling
    example_start_utc = example_start.tz_convert('UTC')
    example_end_utc = example_end.tz_convert('UTC')
    print(f"\nWith timezone conversion to UTC:")
    print(f"  Start: {example_start_utc}")
    print(f"  End:   {example_end_utc}")
    during_sleep_correct = recent_hr[(recent_hr['timestamp'] >= example_start_utc) & (recent_hr['timestamp'] <= example_end_utc)]
    print(f"  HR samples that SHOULD be filtered: {len(during_sleep_correct)}")

In [None]:
# Let's see which hours have the most "waking" samples after filtering
from app.analytics import get_sleep_intervals, filter_hr_outside_sleep, resample_heartrate

end_date = heartrate_df['timestamp'].max()
start_date = end_date - timedelta(days=28)
last_28_days_hr = heartrate_df[heartrate_df['timestamp'] >= start_date].copy()

sleep_intervals = get_sleep_intervals(sleep_df)

# Current filtering (may have timezone issues)
filtered_hr, _ = filter_hr_outside_sleep(last_28_days_hr, sleep_intervals)
waking_hr = resample_heartrate(filtered_hr)
waking_hr.set_index('timestamp', inplace=True)

# Check samples by hour
waking_hr['hour'] = waking_hr.index.hour
hourly_counts = waking_hr.groupby('hour').size()

print("Samples per hour (current filtering):")
for hour in range(24):
    count = hourly_counts.get(hour, 0)
    bar = '█' * (count // 20)
    # Highlight suspicious hours (typical sleep time 23:00 - 09:00)
    marker = " ⚠️" if hour in [23, 0, 1, 2, 3, 4, 5, 6, 7, 8] and count > 50 else ""
    print(f"  {hour:02d}:00 - {count:4d} {bar}{marker}")

# Now let's check what the actual filtered samples look like for a specific night
print("\n\nChecking samples around a specific sleep period...")
# Get a sleep period with clear timestamps
test_interval = sleep_intervals[-5]  # A few days back
test_start, test_end = test_interval
print(f"Sleep period: {test_start} to {test_end}")

# Get HR samples in a window around this sleep
window_start = test_start - pd.Timedelta(hours=2)
window_end = test_end + pd.Timedelta(hours=2)

# Convert to UTC for comparison
test_start_utc = test_start.tz_convert('UTC')
test_end_utc = test_end.tz_convert('UTC')
window_start_utc = window_start.tz_convert('UTC')
window_end_utc = window_end.tz_convert('UTC')

hr_window = last_28_days_hr[(last_28_days_hr['timestamp'] >= window_start_utc) & 
                             (last_28_days_hr['timestamp'] <= window_end_utc)].copy()
hr_window['should_filter'] = (hr_window['timestamp'] >= test_start_utc) & (hr_window['timestamp'] <= test_end_utc)

# Check what the current code filters vs what it should
hr_window['current_filters'] = (hr_window['timestamp'] >= test_start) & (hr_window['timestamp'] <= test_end)

print(f"\nIn the window ({window_start_utc} to {window_end_utc}):")
print(f"  Total HR samples: {len(hr_window)}")
print(f"  Should be filtered (UTC-aware): {hr_window['should_filter'].sum()}")
print(f"  Currently filtered: {hr_window['current_filters'].sum()}")
print(f"  Difference: {hr_window['should_filter'].sum() - hr_window['current_filters'].sum()}")

In [None]:
# Identify which nights are missing sleep data
from datetime import timedelta

end_date = heartrate_df['timestamp'].max()
start_date = end_date - timedelta(days=28)

# Get all dates in the last 28 days
all_dates = pd.date_range(start=start_date.date(), end=end_date.date(), freq='D')

# Get dates with sleep records (using 'day' column which represents the night)
long_sleep = sleep_df[sleep_df['type'] == 'long_sleep'].copy()
sleep_dates = set(pd.to_datetime(long_sleep['day']).dt.date)

# Find missing dates
missing_dates = [d.date() for d in all_dates if d.date() not in sleep_dates]
print(f"Missing sleep records for these dates (last 28 days):")
for d in sorted(missing_dates):
    print(f"  {d}")

print(f"\nTotal missing: {len(missing_dates)} nights")

# Now count HR samples from those missing dates during typical sleep hours (22:00 - 10:00)
waking_hr_debug = last_28_days_hr.copy()
waking_hr_debug['date'] = waking_hr_debug['timestamp'].dt.date
waking_hr_debug['hour'] = waking_hr_debug['timestamp'].dt.hour

# Samples during typical sleep hours
sleep_hours = list(range(0, 10)) + [22, 23]
during_sleep_hours = waking_hr_debug[waking_hr_debug['hour'].isin(sleep_hours)]

# Group by date and count
print("\nHR samples during typical sleep hours (22:00-10:00) by date:")
for d in sorted(missing_dates):
    # For a missing night, we'd expect samples from the evening before and morning of
    evening_before = d - timedelta(days=1)
    
    # Evening samples (22:00-23:59 of the day before)
    evening_samples = during_sleep_hours[
        (during_sleep_hours['date'] == evening_before) & 
        (during_sleep_hours['hour'].isin([22, 23]))
    ]
    
    # Morning samples (00:00-09:59 of the missing date)
    morning_samples = during_sleep_hours[
        (during_sleep_hours['date'] == d) & 
        (during_sleep_hours['hour'].isin(range(0, 10)))
    ]
    
    total = len(evening_samples) + len(morning_samples)
    print(f"  {d}: {total} samples not filtered (evening: {len(evening_samples)}, morning: {len(morning_samples)})")

In [None]:
# Check what sleep types exist for the missing dates
missing_dates_set = set(missing_dates)

# Get all sleep records (not just long_sleep) for missing dates
all_sleep_for_missing = sleep_df[pd.to_datetime(sleep_df['day']).dt.date.isin(missing_dates_set)]

print("Sleep records for missing dates (all types):\n")
if len(all_sleep_for_missing) == 0:
    print("No sleep records at all for these dates!")
else:
    for d in sorted(missing_dates):
        day_records = all_sleep_for_missing[pd.to_datetime(all_sleep_for_missing['day']).dt.date == d]
        if len(day_records) == 0:
            print(f"{d}: NO RECORDS")
        else:
            for _, row in day_records.iterrows():
                duration_hrs = row['total_sleep_duration'] / 3600 if row['total_sleep_duration'] else 0
                start = pd.to_datetime(row['bedtime_start']).strftime('%H:%M') if row['bedtime_start'] else 'N/A'
                end = pd.to_datetime(row['bedtime_end']).strftime('%H:%M') if row['bedtime_end'] else 'N/A'
                print(f"{d}: type='{row['type']}', {start}-{end}, duration={duration_hrs:.1f}h")

In [None]:
# Test the new fallback filtering
from importlib import reload
import app.analytics
reload(app.analytics)

from app.analytics import (
    get_sleep_intervals, filter_hr_outside_sleep, resample_heartrate,
    get_monthly_avg_sleep_times, generate_fallback_sleep_intervals
)

# First, check the monthly average sleep times
monthly_avgs = get_monthly_avg_sleep_times(sleep_df)
print("Monthly average sleep times:")
for month, (start, end) in sorted(monthly_avgs.items())[-6:]:  # Last 6 months
    start_h = int(start % 24)
    start_m = int((start % 1) * 60)
    end_h = int(end)
    end_m = int((end % 1) * 60)
    print(f"  {month}: {start_h:02d}:{start_m:02d} - {end_h:02d}:{end_m:02d}")

print("\n" + "="*60)

# Now compare filtering with and without fallback
end_date = heartrate_df['timestamp'].max()
start_date = end_date - timedelta(days=28)
last_28_days_hr = heartrate_df[heartrate_df['timestamp'] >= start_date].copy()

# Without fallback (old behavior) - don't pass date range
sleep_intervals_no_fallback = get_sleep_intervals(sleep_df)
filtered_no_fallback, hours_no_fallback = filter_hr_outside_sleep(
    last_28_days_hr, sleep_intervals_no_fallback
)

# With fallback (new behavior) - pass date range to get fallback intervals
sleep_intervals_with_fallback = get_sleep_intervals(sleep_df, start_date.date(), end_date.date())
filtered_with_fallback, hours_with_fallback = filter_hr_outside_sleep(
    last_28_days_hr, sleep_intervals_with_fallback
)

print(f"\nFiltering comparison (last 28 days):")
print(f"  Without fallback: {len(filtered_no_fallback)} samples ({hours_no_fallback:.1f}h sleep filtered)")
print(f"  With fallback:    {len(filtered_with_fallback)} samples ({hours_with_fallback:.1f}h sleep filtered)")
print(f"  Difference:       {len(filtered_no_fallback) - len(filtered_with_fallback)} additional samples filtered")

# Check the fallback intervals generated
fallback_intervals = generate_fallback_sleep_intervals(start_date.date(), end_date.date(), sleep_df, sleep_intervals_no_fallback)
print(f"\nFallback intervals generated: {len(fallback_intervals)}")
for start, end in sorted(fallback_intervals, key=lambda x: x[0]):
    print(f"  {start.date()}: {start.strftime('%H:%M')} - {end.strftime('%H:%M')}")

In [None]:
# Compare hourly distribution with and without fallback
import matplotlib.pyplot as plt

# Resample both versions
waking_no_fallback = resample_heartrate(filtered_no_fallback)
waking_no_fallback.set_index('timestamp', inplace=True)
waking_no_fallback['hour'] = waking_no_fallback.index.hour

waking_with_fallback = resample_heartrate(filtered_with_fallback)
waking_with_fallback.set_index('timestamp', inplace=True)
waking_with_fallback['hour'] = waking_with_fallback.index.hour

# Get hourly counts
hourly_no_fallback = waking_no_fallback.groupby('hour').size()
hourly_with_fallback = waking_with_fallback.groupby('hour').size()

# Ensure all 24 hours are represented
all_hours_no_fb = pd.Series(0, index=range(24))
all_hours_no_fb.update(hourly_no_fallback)
all_hours_with_fb = pd.Series(0, index=range(24))
all_hours_with_fb.update(hourly_with_fallback)

# Create comparison plot
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Without fallback
axes[0].bar(range(24), all_hours_no_fb.values, edgecolor='black', alpha=0.7, color='salmon')
axes[0].set_xlabel('Hour of Day')
axes[0].set_ylabel('Number of Samples')
axes[0].set_title('WITHOUT Fallback (Old Behavior)')
axes[0].set_xticks(range(24))
axes[0].grid(True, alpha=0.3, axis='y')
# Highlight sleep hours
for h in [23, 0, 1, 2, 3, 4, 5, 6, 7, 8]:
    axes[0].axvspan(h-0.4, h+0.4, alpha=0.2, color='blue')

# With fallback
axes[1].bar(range(24), all_hours_with_fb.values, edgecolor='black', alpha=0.7, color='lightgreen')
axes[1].set_xlabel('Hour of Day')
axes[1].set_ylabel('Number of Samples')
axes[1].set_title('WITH Fallback (New Behavior)')
axes[1].set_xticks(range(24))
axes[1].grid(True, alpha=0.3, axis='y')
# Highlight sleep hours
for h in [23, 0, 1, 2, 3, 4, 5, 6, 7, 8]:
    axes[1].axvspan(h-0.4, h+0.4, alpha=0.2, color='blue')

plt.suptitle('Heart Rate Samples by Hour of Day (Last 28 Days)\nBlue shading = typical sleep hours', fontsize=12)
plt.tight_layout()
plt.show()

# Print the comparison
print("\nSamples per hour comparison:")
print(f"{'Hour':<6} {'No Fallback':<15} {'With Fallback':<15} {'Difference':<12}")
print("-" * 50)
for hour in range(24):
    no_fb = all_hours_no_fb.get(hour, 0)
    with_fb = all_hours_with_fb.get(hour, 0)
    diff = no_fb - with_fb
    marker = " ⬇️" if diff > 20 else ""
    print(f"{hour:02d}:00  {no_fb:<15} {with_fb:<15} {diff:<12}{marker}")

In [None]:
sleep_df.head()

In [None]:
heartrate_df["source"].value_counts()

In [None]:
(12000*5)/(60*24)

In [None]:
from datetime import datetime, timedelta
from app.analytics import get_sleep_intervals, filter_hr_outside_sleep, resample_heartrate
import matplotlib.pyplot as plt

# Get the last 28 days of data
end_date = heartrate_df['timestamp'].max()
start_date = end_date - timedelta(days=28)

# Filter to last 28 days
last_28_days_hr = heartrate_df[heartrate_df['timestamp'] >= start_date].copy()

# Filter out sleep periods and resample using analytics functions
# Pass start_date and end_date to get_sleep_intervals to include fallback intervals for missing nights
sleep_intervals = get_sleep_intervals(sleep_df, start_date.date(), end_date.date())
filtered_hr, _ = filter_hr_outside_sleep(last_28_days_hr, sleep_intervals)
waking_hr = resample_heartrate(filtered_hr)
waking_hr.set_index('timestamp', inplace=True)
resampled_hr = waking_hr['bpm'].dropna()

# Create histogram
plt.figure(figsize=(12, 6))
plt.hist(resampled_hr.values, bins=50, edgecolor='black', alpha=0.7)
plt.xlabel('Heart Rate (bpm)')
plt.ylabel('Frequency')
plt.title(f'Waking Heart Rate Distribution (Last 28 Days)\n{len(resampled_hr)} samples')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Add samples per hour analysis
hourly_counts = waking_hr.resample('h').size()
hourly_counts = hourly_counts[hourly_counts > 0]

plt.figure(figsize=(12, 6))
plt.hist(hourly_counts.values, bins=30, edgecolor='black', alpha=0.7)
plt.xlabel('Samples per Hour')
plt.ylabel('Frequency')
plt.title(f'Distribution of Heart Rate Samples per Hour (Last 28 Days)\n{len(hourly_counts)} hours with data')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Add gap analysis between consecutive samples
time_diffs = waking_hr.index.to_series().diff()
gaps_minutes = time_diffs.dt.total_seconds() / 60
gaps_minutes = gaps_minutes[gaps_minutes.notna() & (gaps_minutes > 0)]

plt.figure(figsize=(12, 6))
plt.hist(gaps_minutes.values, bins=50, edgecolor='black', alpha=0.7, range=(0, 60))
plt.xlabel('Gap Between Samples (minutes)')
plt.ylabel('Frequency')
plt.title(f'Distribution of Gaps Between Consecutive Heart Rate Samples (Last 28 Days)\n{len(gaps_minutes)} gaps')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"Date range: {start_date.date()} to {end_date.date()}")
print(f"\nGap statistics:")
print(f"Median gap: {gaps_minutes.median():.1f} minutes")
print(f"Mean gap: {gaps_minutes.mean():.1f} minutes")
print(f"Max gap: {gaps_minutes.max():.1f} minutes")

# Samples per hour of day (0-23) analysis
waking_hr['hour_of_day'] = waking_hr.index.hour
hourly_distribution = waking_hr['hour_of_day'].value_counts().sort_index()

# Ensure all 24 hours are represented
all_hours = pd.Series(0, index=range(24))
all_hours.update(hourly_distribution)

plt.figure(figsize=(12, 6))
plt.bar(range(24), all_hours.values, edgecolor='black', alpha=0.7)
plt.xlabel('Hour of Day')
plt.ylabel('Number of Samples')
plt.title(f'Heart Rate Samples by Hour of Day (Last 28 Days)')
plt.xticks(range(24))
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

print(f"Total waking HR samples (resampled): {len(resampled_hr)}")
print(f"Mean HR: {resampled_hr.mean():.1f} bpm")
print(f"Median HR: {resampled_hr.median():.1f} bpm")
print(f"Min HR: {resampled_hr.min():.0f} bpm")
print(f"Max HR: {resampled_hr.max():.0f} bpm")

In [None]:
from app.analytics import get_sleep_intervals, filter_hr_outside_sleep, resample_heartrate
import numpy as np
from numpy.polynomial import polynomial as P


# Pass start and end dates to get_sleep_intervals to generate fallback intervals for missing nights
sleep_intervals = get_sleep_intervals(sleep_df, heartrate_df.day.min(), heartrate_df.day.max())
filtered_hr, _ = filter_hr_outside_sleep(heartrate_df, sleep_intervals)
waking_hr_all = resample_heartrate(filtered_hr)
waking_hr_all.set_index('timestamp', inplace=True)

# Add 'day' column (date part of timestamp)
waking_hr_all['day'] = waking_hr_all.index.date
# Add month column
waking_hr_all['month'] = waking_hr_all.index.to_period('M')

# Calculate p20, p50, p80 per month
monthly_stats = waking_hr_all.groupby('month')['bpm'].agg([
    ('p20', lambda x: x.quantile(0.20)),
    ('p50', lambda x: x.quantile(0.50)),
    ('p80', lambda x: x.quantile(0.80)),
    ('p95', lambda x: x.quantile(0.95))
]).reset_index()

# Convert period to timestamp for plotting
monthly_stats['month'] = monthly_stats['month'].dt.to_timestamp()

# Plot
plt.figure(figsize=(14, 7))
plt.plot(monthly_stats['month'], monthly_stats['p20'], marker='o', label='P20', linewidth=2)
plt.plot(monthly_stats['month'], monthly_stats['p50'], marker='o', label='P50 (Median)', linewidth=2)
plt.plot(monthly_stats['month'], monthly_stats['p80'], marker='o', label='P80', linewidth=2)

# Add trend lines

# Fit linear trend for each percentile
x_numeric = np.arange(len(monthly_stats))

# P20 trend
p20_coef = np.polyfit(x_numeric, monthly_stats['p20'], 1)
p20_trend = np.polyval(p20_coef, x_numeric)
plt.plot(monthly_stats['month'], p20_trend, '--', alpha=0.5, color='C0', linewidth=1)

# P50 trend
p50_coef = np.polyfit(x_numeric, monthly_stats['p50'], 1)
p50_trend = np.polyval(p50_coef, x_numeric)
plt.plot(monthly_stats['month'], p50_trend, '--', alpha=0.5, color='C1', linewidth=1)

# P80 trend
p80_coef = np.polyfit(x_numeric, monthly_stats['p80'], 1)
p80_trend = np.polyval(p80_coef, x_numeric)
plt.plot(monthly_stats['month'], p80_trend, '--', alpha=0.5, color='C2', linewidth=1)

# P95 trend
p95_coef = np.polyfit(x_numeric, monthly_stats['p95'], 1)
p95_trend = np.polyval(p95_coef, x_numeric)
plt.plot(monthly_stats['month'], p95_trend, '--', alpha=0.5, color='C3', linewidth=1)
plt.plot(monthly_stats['month'], monthly_stats['p95'], marker='o', label='P95', linewidth=2)
plt.fill_between(monthly_stats['month'], monthly_stats['p20'], monthly_stats['p80'], alpha=0.2)
plt.xlabel('Month')
plt.ylabel('Heart Rate (bpm)')
plt.title('Monthly Waking Heart Rate Percentiles (P20, P50, P80, P95)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
# Add 6-month rolling trend lines
# Calculate rolling 6-month trends for each percentile
monthly_stats_sorted = monthly_stats.sort_values('month')

for i in range(len(monthly_stats_sorted)):
    if i >= 5:  # Need at least 6 months for a 6-month trend
        # Get last 6 months including current
        last_6_months = monthly_stats_sorted.iloc[i-5:i+1]
        x_6m = np.arange(6)
        
        # Fit trends for last 6 months
        p20_6m = np.polyfit(x_6m, last_6_months['p20'], 1)
        p50_6m = np.polyfit(x_6m, last_6_months['p50'], 1)
        p80_6m = np.polyfit(x_6m, last_6_months['p80'], 1)
        p95_6m = np.polyfit(x_6m, last_6_months['p95'], 1)
        
        # Plot trend lines for this 6-month window
        p20_trend_6m = np.polyval(p20_6m, x_6m)
        p50_trend_6m = np.polyval(p50_6m, x_6m)
        p80_trend_6m = np.polyval(p80_6m, x_6m)
        p95_trend_6m = np.polyval(p95_6m, x_6m)
        
        plt.plot(last_6_months['month'], p20_trend_6m, ':', alpha=0.3, color='C0', linewidth=1.5)
        plt.plot(last_6_months['month'], p50_trend_6m, ':', alpha=0.3, color='C1', linewidth=1.5)
        plt.plot(last_6_months['month'], p80_trend_6m, ':', alpha=0.3, color='C2', linewidth=1.5)
        plt.plot(last_6_months['month'], p95_trend_6m, ':', alpha=0.3, color='C3', linewidth=1.5)
print(f"Total months: {len(monthly_stats)}")
print(f"\nMonthly statistics:")
print(monthly_stats.to_string(index=False))