# 01 — Exploratory Data Analysis

> ⚠️ **DISCLAIMER**: This is an educational prototype. Output is NOT medical advice.

In [None]:
import sys
sys.path.insert(0, '../src')

from diabetes_explainer.synth_data import generate
from diabetes_explainer import data_schema

df = generate(n_days=14, seed=42)
df = data_schema.validate(df)
print(f'Shape: {df.shape}')
df.head()

In [None]:
df.describe()

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(3, 1, figsize=(14, 9), sharex=True)

# Plot first 3 days for clarity
n = 3 * 24 * 12
subset = df.iloc[:n]

axes[0].plot(subset['timestamp'], subset['glucose_mgdl'], color='steelblue', lw=1)
axes[0].axhline(70, color='orange', ls='--', lw=0.8, label='Low (70)')
axes[0].axhline(180, color='red', ls='--', lw=0.8, label='High (180)')
axes[0].set_ylabel('Glucose (mg/dL)')
axes[0].set_title('Synthetic Glucose Time Series (first 3 days)')
axes[0].legend(fontsize=8)

axes[1].bar(subset['timestamp'], subset['carbs_g'], color='green', width=0.003, label='Carbs (g)')
axes[1].set_ylabel('Carbs (g)')
axes[1].legend(fontsize=8)

axes[2].plot(subset['timestamp'], subset['activity_steps'], color='purple', lw=1)
axes[2].set_ylabel('Steps')
axes[2].set_xlabel('Time')

plt.tight_layout()
plt.show()

In [None]:
# Glucose distribution
fig, ax = plt.subplots(figsize=(8, 4))
df['glucose_mgdl'].hist(bins=50, ax=ax, color='steelblue', edgecolor='white')
ax.set_xlabel('Glucose (mg/dL)')
ax.set_ylabel('Count')
ax.set_title('Distribution of Synthetic Glucose Values')
plt.tight_layout()
plt.show()