# 01: Synthetic Data Generation

This notebook generates synthetic patient cohort data for chronotherapy research.

**Note:** This is synthetic data for demonstration only. Real patient data requires IRB approval.

In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Add src to path
sys.path.insert(0, str(Path('..')))

from src.data.simulate_cohort import SyntheticCohortGenerator

## Generate Synthetic Cohort

In [None]:
# Initialize generator
generator = SyntheticCohortGenerator(seed=42)

# Generate cohort
n_patients = 200
n_treatments = 4

print(f"Generating {n_patients} patients with {n_treatments} treatments each...")
cohort = generator.generate_cohort(
    n_patients=n_patients,
    n_treatments_per_patient=n_treatments,
    n_days_circadian=30
)

## Explore Generated Data

In [None]:
# Demographics
print("Demographics:")
print(cohort['demographics'].head())
print(f"\nTotal patients: {len(cohort['demographics'])}")

In [None]:
# Save to data directory
output_dir = Path('../data/synthetic')
generator.save_cohort(cohort, output_dir)

print(f"\nData saved to {output_dir}")