# DiaTrend LLM Pipeline Demo

This notebook demonstrates how to use the LLM pipeline to generate personalized insights from DiaTrend glucose and insulin data.

In [None]:
# Import necessary libraries
import sys
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Add the src directory to the path
sys.path.append('../')
from src.data_loader import DataLoader
from src.feature_engineer import FeatureEngineer
from src.insight_prompt_builder import InsightPromptBuilder
from src.llm_engine import LLMEngine

# Set up plotting
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Load Processed DiaTrend Data

First, let's load the processed DiaTrend data and user goals that we saved in the previous notebook.

In [None]:
# Define paths
processed_dir = '../data/processed'
outputs_dir = '../outputs'
os.makedirs(outputs_dir, exist_ok=True)

# Load daily features
try:
    with open(os.path.join(processed_dir, 'diatrend_daily_features.json'), 'r') as f:
        daily_features = json.load(f)
    print(f"Loaded features for {len(daily_features)} days")
except FileNotFoundError:
    print("Daily features file not found. Please run the data preparation notebook first.")
    # Create sample data for demonstration
    daily_features = {
        "2023-01-01": {
            "avg_glucose": 156.3,
            "min_glucose": 72.0,
            "max_glucose": 243.0,
            "glucose_range": 171.0,
            "glucose_std": 48.2,
            "hypo_events": 1,
            "hyper_events": 5,
            "time_in_range_percent": 62.5,
            "total_daily_insulin": 42.5,
            "insulin_doses_count": 5,
            "glucose_volatility_2hr_mean": 35.2,
            "glucose_volatility_2hr_max": 68.7,
            "glucose_rate_of_change_mean": 0.8,
            "glucose_rate_of_change_max": 3.2,
            "glucose_rate_of_change_min": -2.1,
            "pre_meal_glucose_slope_mean": 0.3,
            "post_meal_glucose_slope_mean": 1.8,
            "comment_sentiment_score": -0.2,
            "tag_exercise": 1
        }
    }

# Load user goals
try:
    with open(os.path.join(processed_dir, 'diatrend_user_goals.json'), 'r') as f:
        user_goals = json.load(f)
    print(f"Loaded user goals for {user_goals['name']}")
except FileNotFoundError:
    print("User goals file not found. Using default goals.")
    user_goals = {
        "name": "Alex",
        "primary_goals": [
            {"area": "glucose", "goal": "Reduce post-meal glucose spikes"},
            {"area": "insulin", "goal": "Optimize insulin timing for better glucose control"},
            {"area": "lifestyle", "goal": "Understand how exercise affects glucose levels"}
        ],
        "diabetes_type": "Type 1",
        "target_glucose_range": "70-180 mg/dL"
    }

# Select a sample day for analysis
sample_day = list(daily_features.keys())[0]
sample_features = daily_features[sample_day]

print(f"\nSelected sample day: {sample_day}")
print(f"Average glucose: {sample_features['avg_glucose']:.1f} mg/dL")
print(f"Time in range: {sample_features['time_in_range_percent']:.1f}%")
print(f"Total insulin: {sample_features['total_daily_insulin']:.1f} units")

## 2. Initialize LLM Components

Now, let's initialize the prompt builder and LLM engine.

In [None]:
# Initialize prompt builder and LLM engine
prompt_builder = InsightPromptBuilder()
llm_engine = LLMEngine()

# Check if API key is set
api_key_set = os.environ.get("OPENAI_API_KEY") is not None
if not api_key_set:
    print("WARNING: OpenAI API key not set. Insights will be simulated.")
    print("To set the API key, run: export OPENAI_API_KEY='your-api-key'")

## 3. Generate Insights for a Single Day

Let's generate insights for our sample day using different tones.

In [None]:
# Define the tones to use
tones = ['coach', 'medical', 'neutral']

# Generate insights for each tone
insights = {}

for tone in tones:
    # Get the primary goal for glucose management
    glucose_goal = next((goal['goal'] for goal in user_goals['primary_goals'] if goal['area'] == 'glucose'), 
                        "Reduce post-meal glucose spikes")
    
    # Build prompt
    prompt = prompt_builder.build_diatrend_prompt(
        features=sample_features,
        tone=tone,
        user_goal=glucose_goal
    )
    
    print(f"\n=== {tone.upper()} TONE PROMPT ===\n")
    print(f"System prompt:\n{prompt['system'][:300]}...\n")
    print(f"User prompt:\n{prompt['user'][:300]}...\n")
    
    # Generate insight
    if api_key_set:
        insight, metadata = llm_engine.generate_insight(prompt)
    else:
        # Simulate insight
        if tone == 'coach':
            insight = "Great job on keeping your time in range above 60%! I notice your post-meal glucose slopes are quite steep (1.8 mg/dL/min), which suggests your meal insulin timing might need adjustment. Try taking insulin 15-20 minutes before eating to better match the carb absorption. Also, I see you exercised today - awesome! Exercise can help improve insulin sensitivity, but watch for delayed hypoglycemia 6-12 hours after activity."
        elif tone == 'medical':
            insight = "Analysis indicates moderate glycemic variability with a standard deviation of 48.2 mg/dL. Time in range is 62.5%, below the recommended target of >70%. Post-prandial glucose excursions show a mean slope of 1.8 mg/dL/min, suggesting suboptimal meal insulin timing. Consider pre-bolusing insulin 15-20 minutes before meals to improve post-prandial control. Exercise-related glucose patterns indicate potential for improved insulin sensitivity but also risk for delayed hypoglycemia."
        else:  # neutral
            insight = "Your average glucose was 156.3 mg/dL with a range from 72.0 to 243.0 mg/dL. You spent 62.5% of time in the target range (70-180 mg/dL). You had 1 hypoglycemic event and 5 hyperglycemic events. Your total insulin for the day was 42.5 units across 5 doses. Glucose rose at an average rate of 1.8 mg/dL/min after meals, which is relatively fast. Exercise was noted in your comments for the day."
        
        metadata = {
            "model": "simulated",
            "timestamp": datetime.now().isoformat()
        }
    
    # Store insight
    insights[tone] = {
        "insight": insight,
        "metadata": metadata
    }
    
    print(f"\n=== {tone.upper()} TONE INSIGHT ===\n")
    print(insight)
    print("\n" + "-"*80)

## 4. Compare Insights Across Different Tones

Let's compare the insights generated with different tones.

In [None]:
# Display all insights for comparison
for tone, data in insights.items():
    print(f"\n=== {tone.upper()} TONE ===\n")
    print(data['insight'])
    print("\n" + "-"*80)

# Save insights to file
with open(os.path.join(outputs_dir, f'diatrend_insights_{sample_day}.json'), 'w') as f:
    json.dump(insights, f, indent=2)

print(f"\nSaved insights to {os.path.join(outputs_dir, f'diatrend_insights_{sample_day}.json')}")

## 5. Generate Goal-Specific Insights

Now, let's generate insights tailored to specific user goals.

In [None]:
# Generate insights for each user goal
goal_insights = {}

for goal_item in user_goals['primary_goals']:
    area = goal_item['area']
    goal = goal_item['goal']
    
    print(f"\n=== GOAL: {goal} ===\n")
    
    # Build prompt
    prompt = prompt_builder.build_diatrend_prompt(
        features=sample_features,
        tone='coach',
        user_goal=goal
    )
    
    # Generate insight
    if api_key_set:
        insight, metadata = llm_engine.generate_insight(prompt)
    else:
        # Simulate goal-specific insights
        if area == 'glucose':
            insight = "Looking at your post-meal glucose spikes, I notice they rise at 1.8 mg/dL/min on average. To reduce these spikes, consider: 1) Pre-bolusing insulin 15-20 minutes before eating, 2) Starting meals with protein or fat before carbs to slow absorption, and 3) Taking short walks after meals to help glucose uptake. Your current time in range is 62.5% - implementing these strategies could help you get closer to the recommended 70%+."
        elif area == 'insulin':
            insight = "To optimize your insulin timing, I see two key opportunities: 1) Your post-meal glucose rises quickly at 1.8 mg/dL/min, suggesting you might benefit from taking insulin 15-20 minutes before meals rather than at mealtime. 2) Your total daily insulin of 42.5 units was spread across 5 doses - consider whether smaller, more frequent boluses might better match your eating patterns. Also, your glucose volatility (48.2 mg/dL standard deviation) suggests your basal/bolus balance might need adjustment."
        else:  # lifestyle
            insight = "I see you exercised today - that's great! Exercise typically improves insulin sensitivity, which I can see in your data as your glucose levels dropped afterward. To better understand this relationship: 1) Try to log the type, intensity, and duration of exercise, 2) Watch for improved insulin sensitivity for up to 24 hours post-exercise, and 3) Be aware that high-intensity exercise can sometimes cause glucose to rise initially. For your next workout, consider reducing meal boluses by 25% if eating within 2 hours after exercise to prevent lows."
        
        metadata = {
            "model": "simulated",
            "timestamp": datetime.now().isoformat()
        }
    
    # Store insight
    goal_insights[area] = {
        "goal": goal,
        "insight": insight,
        "metadata": metadata
    }
    
    print(insight)
    print("\n" + "-"*80)

# Save goal-specific insights
with open(os.path.join(outputs_dir, f'diatrend_goal_insights_{sample_day}.json'), 'w') as f:
    json.dump(goal_insights, f, indent=2)

print(f"\nSaved goal-specific insights to {os.path.join(outputs_dir, f'diatrend_goal_insights_{sample_day}.json')}")

## 6. Generate Insights for Multiple Days

Let's generate insights for multiple days to see trends over time.

In [None]:
# Select a subset of days (up to 5) to analyze
days_to_analyze = list(daily_features.keys())[:min(5, len(daily_features))]
multi_day_insights = {}

for day in days_to_analyze:
    features = daily_features[day]
    
    print(f"\n=== ANALYZING DAY: {day} ===\n")
    print(f"Average glucose: {features['avg_glucose']:.1f} mg/dL")
    print(f"Time in range: {features['time_in_range_percent']:.1f}%")
    
    # Build prompt
    prompt = prompt_builder.build_diatrend_prompt(
        features=features,
        tone='coach',
        user_goal="Improve overall glucose management"
    )
    
    # Generate insight
    if api_key_set:
        insight, metadata = llm_engine.generate_insight(prompt)
    else:
        # Simulate a brief insight
        avg_glucose = features.get('avg_glucose', 150)
        time_in_range = features.get('time_in_range_percent', 60)
        
        if time_in_range > 70:
            quality = "excellent"
        elif time_in_range > 60:
            quality = "good"
        elif time_in_range > 50:
            quality = "moderate"
        else:
            quality = "challenging"
            
        insight = f"Your glucose management was {quality} today with {time_in_range:.1f}% time in range and an average of {avg_glucose:.1f} mg/dL. "
        
        # Add some variety based on features
        if features.get('hypo_events', 0) > 0:
            insight += f"You had {features['hypo_events']} low glucose events which might indicate a need to adjust insulin dosing. "
        
        if features.get('post_meal_glucose_slope_mean', 0) > 1.5:
            insight += "Your post-meal glucose rises quickly, suggesting earlier pre-meal insulin timing might help. "
            
        if features.get('tag_exercise', 0) == 1:
            insight += "Exercise appears to have positively impacted your glucose levels today. "
            
        metadata = {
            "model": "simulated",
            "timestamp": datetime.now().isoformat()
        }
    
    # Store insight
    multi_day_insights[day] = {
        "insight": insight,
        "metadata": metadata
    }
    
    print(f"\nInsight: {insight}\n")

# Save multi-day insights
with open(os.path.join(outputs_dir, 'diatrend_multi_day_insights.json'), 'w') as f:
    json.dump(multi_day_insights, f, indent=2)

print(f"\nSaved multi-day insights to {os.path.join(outputs_dir, 'diatrend_multi_day_insights.json')}")

## 7. Generate Weekly Summary

Finally, let's generate a weekly summary if we have enough data.

In [None]:
# Check if we have enough days for a weekly summary
if len(daily_features) >= 3:  # At least 3 days for a meaningful summary
    print("\n=== GENERATING WEEKLY SUMMARY ===\n")
    
    # Calculate weekly averages
    weekly_avg = {}
    numeric_features = ['avg_glucose', 'time_in_range_percent', 'glucose_std', 'total_daily_insulin',
                        'hypo_events', 'hyper_events']
    
    for feature in numeric_features:
        values = [daily_features[day].get(feature, 0) for day in daily_features if feature in daily_features[day]]
        if values:
            weekly_avg[feature] = sum(values) / len(values)
    
    print("Weekly Averages:")
    for feature, value in weekly_avg.items():
        print(f"- {feature}: {value:.1f}")
    
    # Build a custom prompt for weekly summary
    weekly_prompt = {
        'system': prompt_builder._build_diatrend_system_prompt('coach'),
        'user': f"""Please provide a weekly summary based on the following diabetes management data:

USER PROFILE:
- Name: {user_goals['name']}
- Type: {user_goals.get('diabetes_type', 'Type 1 Diabetes')}
- Primary Goals: {', '.join(goal['goal'] for goal in user_goals['primary_goals'])}

WEEKLY AVERAGES:
- Days analyzed: {len(daily_features)}
- Average glucose: {weekly_avg.get('avg_glucose', 'N/A'):.1f} mg/dL
- Time in range: {weekly_avg.get('time_in_range_percent', 'N/A'):.1f}%
- Glucose standard deviation: {weekly_avg.get('glucose_std', 'N/A'):.1f} mg/dL
- Average daily insulin: {weekly_avg.get('total_daily_insulin', 'N/A'):.1f} units
- Hypoglycemic events per day: {weekly_avg.get('hypo_events', 'N/A'):.1f}
- Hyperglycemic events per day: {weekly_avg.get('hyper_events', 'N/A'):.1f}

Please provide:
1. A summary of the overall weekly glucose management
2. Key patterns or trends observed
3. 2-3 specific recommendations for the coming week
4. Areas of success to celebrate

Format your response in a clear, encouraging coaching style.
"""
    }
    
    # Generate weekly summary
    if api_key_set:
        weekly_summary, metadata = llm_engine.generate_insight(weekly_prompt)
    else:
        # Simulate weekly summary
        time_in_range = weekly_avg.get('time_in_range_percent', 60)
        avg_glucose = weekly_avg.get('avg_glucose', 150)
        hypo_events = weekly_avg.get('hypo_events', 0.5)
        
        weekly_summary = f"""# Your Weekly Diabetes Management Summary

## Overall Assessment
This week, your glucose management showed {"strong" if time_in_range > 70 else "moderate"} results with an average time in range of {time_in_range:.1f}% and an average glucose of {avg_glucose:.1f} mg/dL. You experienced about {hypo_events:.1f} hypoglycemic events per day, which {"is within a reasonable range" if hypo_events < 1 else "suggests we should focus on reducing lows"}.

## Key Patterns Observed
- Your glucose variability (standard deviation of {weekly_avg.get('glucose_std', 40):.1f} mg/dL) {"is within target range" if weekly_avg.get('glucose_std', 40) < 50 else "indicates opportunities to smooth glucose curves"}
- Your daily insulin needs averaged {weekly_avg.get('total_daily_insulin', 40):.1f} units
- Post-meal glucose rises appear to be a significant contributor to time spent above range

## Recommendations for Next Week
1. Consider pre-bolusing insulin 15-20 minutes before meals to better match carbohydrate absorption
2. Track the relationship between exercise and glucose levels more closely to optimize activity timing
3. Review overnight basal rates/doses as morning glucose patterns suggest potential for optimization

## Celebrate These Wins!
- You've consistently tracked your data, which is the foundation for improvement
- Your {"limited hypoglycemic events" if hypo_events < 1 else "proactive management of glucose levels"} shows your attention to safety
- You're making progress toward your goals, especially in {user_goals['primary_goals'][0]['area']} management

Keep up the great work! Small, consistent improvements lead to significant long-term results.
"""
        
        metadata = {
            "model": "simulated",
            "timestamp": datetime.now().isoformat()
        }
    
    print("\nWeekly Summary:")
    print(weekly_summary)
    
    # Save weekly summary
    with open(os.path.join(outputs_dir, 'diatrend_weekly_summary.json'), 'w') as f:
        json.dump({
            "summary": weekly_summary,
            "metadata": metadata,
            "weekly_averages": weekly_avg
        }, f, indent=2)
    
    print(f"\nSaved weekly summary to {os.path.join(outputs_dir, 'diatrend_weekly_summary.json')}")
else:
    print("Not enough days available for a meaningful weekly summary. Need at least 3 days.")

## 8. Summary

In this notebook, we've demonstrated how to:

1. Load processed DiaTrend data and user goals
2. Generate insights using different tones (coach, medical, neutral)
3. Create goal-specific insights tailored to user needs
4. Generate insights for multiple days
5. Create a weekly summary of diabetes management

These insights can be used to provide personalized feedback to users, helping them better understand their glucose patterns and make informed decisions about their diabetes management.