# Glucose Tracker - Data Exploration & Dashboard

This notebook provides an interactive way to explore your glucose tracking data and create customized dashboards.

## 📋 Contents
1. [Setup & Data Loading](#setup)
2. [Data Quality Assessment](#quality)
3. [Exploratory Data Analysis](#eda)
4. [Glucose Pattern Analysis](#patterns)
5. [Activity Impact Analysis](#activity)
6. [Interactive Dashboard Creation](#dashboard)
7. [Export & Reporting](#export)

## 1. Setup & Data Loading {#setup}

Let's start by importing the necessary libraries and loading our data.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
from datetime import datetime, timedelta
from pathlib import Path

# Configure display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("📚 Libraries imported successfully!")
print(f"📅 Analysis date: {datetime.now().strftime('%Y-%m-%d %H:%M')}")

In [None]:
# Configuration
DATA_DIR = Path("data/sample")
OUTPUT_DIR = Path("output")
OUTPUT_DIR.mkdir(exist_ok=True)

# Glucose target ranges (mmol/L)
GLUCOSE_TARGET_RANGE = (3.9, 10.0)
GLUCOSE_OPTIMAL_RANGE = (4.0, 7.8)

# Colors for visualization
COLORS = {
    'glucose': '#2E86AB',
    'sleep': '#A23B72',
    'workout': '#F18F01',
    'nutrition': '#C73E1D',
    'target': '#59CD90'
}

print(f"📁 Data directory: {DATA_DIR}")
print(f"📂 Output directory: {OUTPUT_DIR}")

In [None]:
# Data loading functions
def load_glucose_data(filepath):
    """Load and preprocess glucose data"""
    df = pd.read_csv(filepath)
    df['Device Timestamp'] = pd.to_datetime(df['Device Timestamp'])
    df = df.set_index('Device Timestamp').sort_index()
    df = df.groupby(level=0).mean()  # Remove duplicates
    df.replace(0, np.nan, inplace=True)
    return df

def load_sleep_data(filepath):
    """Load sleep data"""
    df = pd.read_csv(filepath, sep=';')
    df['Start'] = pd.to_datetime(df['Start'])
    df['End'] = pd.to_datetime(df['End'])
    return df

def load_workout_data(filepath):
    """Load workout data"""
    df = pd.read_csv(filepath)
    df['start_time'] = pd.to_datetime(df['start_time'])
    df['end_time'] = pd.to_datetime(df['end_time'])
    return df

def load_nutrition_data(filepath):
    """Load nutrition data"""
    df = pd.read_csv(filepath)
    df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], dayfirst=True)
    df = df.set_index('Datetime')
    return df

print("🔧 Data loading functions defined!")

In [None]:
# Load all data
try:
    glucose_df = load_glucose_data(DATA_DIR / 'glucose_data.csv')
    sleep_df = load_sleep_data(DATA_DIR / 'sleepdata.csv')
    workout_df = load_workout_data(DATA_DIR / 'workout_data.csv')
    nutrition_df = load_nutrition_data(DATA_DIR / 'food_log.csv')
    
    print("✅ All data loaded successfully!")
    print(f"📊 Glucose records: {len(glucose_df):,}")
    print(f"😴 Sleep sessions: {len(sleep_df)}")
    print(f"💪 Workout sessions: {len(workout_df)}")
    print(f"🍽️ Nutrition entries: {len(nutrition_df)}")
    
except FileNotFoundError as e:
    print(f"❌ Data files not found: {e}")
    print("💡 Run the quickstart.py script first to generate sample data")
    print("   python quickstart.py --create-sample")

## 2. Data Quality Assessment {#quality}

Let's examine the quality and completeness of our data.

In [None]:
# Data quality assessment
def assess_data_quality(df, name):
    """Assess data quality for a dataframe"""
    print(f"\n📋 {name} Data Quality:")
    print(f"   Shape: {df.shape}")
    print(f"   Date range: {df.index.min()} to {df.index.max()}")
    print(f"   Missing values: {df.isnull().sum().sum()}")
    print(f"   Duplicates: {df.index.duplicated().sum()}")
    
    if hasattr(df.index, 'freq'):
        print(f"   Frequency: {df.index.freq}")
    
    return df.describe()

# Assess each dataset
glucose_stats = assess_data_quality(glucose_df, "Glucose")
print("\n📊 Glucose Statistics:")
display(glucose_stats)

# Check data gaps in glucose monitoring
glucose_data = glucose_df['Historic Glucose mmol/L'].dropna()
time_gaps = glucose_data.index.to_series().diff()
large_gaps = time_gaps[time_gaps > pd.Timedelta('15 minutes')]

print(f"\n⏰ Glucose Monitoring Gaps:")
print(f"   Gaps > 15 minutes: {len(large_gaps)}")
if len(large_gaps) > 0:
    print(f"   Largest gap: {large_gaps.max()}")
    print(f"   Average gap: {large_gaps.mean()}")

In [None]:
# Create interactive dashboard with Plotly
def create_interactive_dashboard():
    """Create comprehensive interactive dashboard"""
    
    # Create simple interactive plot
    glucose_data = glucose_df['Historic Glucose mmol/L'].dropna()
    
    fig = go.Figure()
    
    # Add glucose line
    fig.add_trace(
        go.Scatter(
            x=glucose_data.index,
            y=glucose_data.values,
            mode='lines',
            name='Glucose',
            line=dict(color=COLORS['glucose'], width=1),
            hovertemplate='<b>%{x}</b><br>Glucose: %{y:.1f} mmol/L<extra></extra>'
        )
    )
    
    # Add target range
    fig.add_hrect(
        y0=GLUCOSE_TARGET_RANGE[0], y1=GLUCOSE_TARGET_RANGE[1],
        fillcolor=COLORS['target'], opacity=0.2,
        layer="below", line_width=0
    )
    
    # Update layout
    fig.update_layout(
        title='🩸 Interactive Glucose Tracking Dashboard',
        xaxis_title='Time',
        yaxis_title='Glucose (mmol/L)',
        template='plotly_white',
        height=600
    )
    
    return fig

# Create and display dashboard
dashboard = create_interactive_dashboard()
dashboard.show()

print("✅ Interactive dashboard created!")

## 7. Export & Reporting {#export}

Export analysis results and create summary reports.

In [None]:
# Export analysis results
def export_analysis_results():
    """Export analysis results to files"""
    
    # Create summary statistics
    glucose_data = glucose_df['Historic Glucose mmol/L'].dropna()
    
    summary_stats = {
        'metric': [
            'Mean Glucose (mmol/L)',
            'Median Glucose (mmol/L)',
            'Standard Deviation',
            'Coefficient of Variation (%)',
            'Time in Target Range (%)',
            'Time Below Target (%)',
            'Time Above Target (%)'
        ],
        'value': [
            glucose_data.mean(),
            glucose_data.median(),
            glucose_data.std(),
            (glucose_data.std() / glucose_data.mean() * 100),
            (((glucose_data >= GLUCOSE_TARGET_RANGE[0]) & 
              (glucose_data <= GLUCOSE_TARGET_RANGE[1])).sum() / len(glucose_data) * 100),
            ((glucose_data < GLUCOSE_TARGET_RANGE[0]).sum() / len(glucose_data) * 100),
            ((glucose_data > GLUCOSE_TARGET_RANGE[1]).sum() / len(glucose_data) * 100)
        ]
    }
    
    summary_df = pd.DataFrame(summary_stats)
    
    # Export to CSV
    summary_df.to_csv(OUTPUT_DIR / 'glucose_analysis_summary.csv', index=False)
    
    # Export hourly averages
    hourly_avg = glucose_data.groupby(glucose_data.index.hour).agg([
        'mean', 'std', 'min', 'max', 'count'
    ])
    hourly_avg.to_csv(OUTPUT_DIR / 'hourly_glucose_patterns.csv')
    
    # Export daily summaries
    daily_stats = glucose_data.groupby(glucose_data.index.date).agg([
        'mean', 'std', 'min', 'max', 'count'
    ])
    daily_stats.to_csv(OUTPUT_DIR / 'daily_glucose_summary.csv')
    
    print("📊 Analysis results exported:")
    print(f"   - Summary statistics: {OUTPUT_DIR / 'glucose_analysis_summary.csv'}")
    print(f"   - Hourly patterns: {OUTPUT_DIR / 'hourly_glucose_patterns.csv'}")
    print(f"   - Daily summaries: {OUTPUT_DIR / 'daily_glucose_summary.csv'}")
    
    return summary_df

# Export results
summary_results = export_analysis_results()
display(summary_results)

In [None]:
# Generate final summary report
print("📋 GLUCOSE TRACKING ANALYSIS SUMMARY")
print("=" * 50)
print(f"📅 Analysis Period: {glucose_df.index.min().date()} to {glucose_df.index.max().date()}")
print(f"📊 Total Glucose Readings: {len(glucose_df['Historic Glucose mmol/L'].dropna()):,}")
print(f"⏱️ Data Collection Days: {(glucose_df.index.max() - glucose_df.index.min()).days}")
print("\n🎯 KEY METRICS:")
for _, row in summary_results.iterrows():
    if 'Time' in row['metric']:
        print(f"   {row['metric']}: {row['value']:.1f}%")
    else:
        print(f"   {row['metric']}: {row['value']:.2f}")

print("\n✅ Analysis completed successfully!")
print(f"📁 Results saved to: {OUTPUT_DIR}")