In [None]:
# !pip install -r requirements-py37.txt
# !pip install -e .

In [1]:
from pathlib import Path
from backend.dataaccess import DataAccess
from core.mediator import Mediator
import pandas as pd

In [2]:
# Paths
KB_PATH = Path("core/knowledge-base")
DB_PATH = Path("backend/data/mediator.db")
CSV_PATH = Path("backend/data/synthetic_input_data.csv")

In [3]:
# 1. Connect to existing DB
da = DataAccess(db_path=str(DB_PATH))

# # 2. Or auto-create (and optionally drop existing)
# da = DataAccess(db_path=str(DB_PATH), auto_create=True)

# Check stats
stats = da.get_table_stats()
for table, info in stats.items():
    print(f"{table}: {info['rows']} rows, {info['n_patients']} patients")

InputPatientData: 1778 rows, 12 patients
OutputPatientData: 0 rows, 0 patients
PatientQAScores: 0 rows, 0 patients


In [None]:
# Load CSV into InputPatientData
total_rows = da.load_csv_to_input(
    csv_path=str(CSV_PATH),
    if_exists='append',           # 'append' or 'replace'
    clear_output_and_qa=False,    # Set True to clear outputs
    yes=True                      # Auto-confirm
)
print(f"Loaded {total_rows} rows")

In [4]:
# Initialize mediator
mediator = Mediator(knowledge_base_path=KB_PATH, data_access=da)

# Build TAK repository
repo = mediator.build_repository()

print(f"✅ Loaded {len(repo.taks)} TAKs:")
print(f"  - Raw Concepts: {len(mediator.raw_concepts)}")
print(f"  - Events:       {len(mediator.events)}")
print(f"  - States:       {len(mediator.states)}")
print(f"  - Trends:       {len(mediator.trends)}")
print(f"  - Contexts:     {len(mediator.contexts)}")
print(f"  - Patterns:     {len(mediator.patterns)}")

# List all TAK names
print("\nTAK Names:")
for tak_name in sorted(repo.taks.keys()):
    print(f"  - {tak_name}")


PHASE 1: Building TAK Repository


Loading TAKs:   0%|          | 0/37 [00:00<?, ?file/s, Raw Concepts: ADMISSION]

Loading TAKs:  51%|█████▏    | 19/37 [00:00<00:00, 168.97file/s, States: ANTIDIABETIC_DRUGS_IV_BITZUA_STATE]BASAL_BITZUA_STATE: attribute idx=0 ('BASAL_DOSAGE', numeric) has no range covering -infinity. Very low values will be filtered out.
Loading TAKs:  54%|█████▍    | 20/37 [00:00<00:00, 168.97file/s, States: BASAL_BITZUA_STATE]                BOLUS_BITZUA_STATE: attribute idx=0 ('BOLUS_DOSAGE', numeric) has no range covering -infinity. Very low values will be filtered out.
Loading TAKs:  57%|█████▋    | 21/37 [00:00<00:00, 168.97file/s, States: BOLUS_BITZUA_STATE]CREATININE_SERUM_MEASURE_STATE: attribute idx=0 ('CREATININE_SERUM_MEASURE', numeric) has no range covering -infinity. Very low values will be filtered out.
Loading TAKs:  59%|█████▉    | 22/37 [00:00<00:00, 168.97file/s, States: CREATININE_SERUM_MEASURE_STATE]GLUCOSE_MEASURE_STATE: attribute idx=0 ('GLUCOSE_MEASURE', numeric) has no range covering -infinity. Very low values will be filtered out.
Loading TAKs: 100%|███████


[Validation] Running business-logic checks on TAK repository...

✅ TAK Repository Built Successfully
  Raw Concepts: 15
  Events:       4
  States:       5
  Trends:       2
  Contexts:     6
  Patterns:     5
  TOTAL TAKs:   37

✅ Loaded 37 TAKs:
  - Raw Concepts: 15
  - Events:       4
  - States:       5
  - Trends:       2
  - Contexts:     6
  - Patterns:     5

TAK Names:
  - ADMISSION
  - ADMISSION_EVENT
  - ANTIDIABETIC_DRUGS_IV_BITZUA
  - ANTIDIABETIC_DRUGS_IV_BITZUA_CONTEXT
  - ANTIDIABETIC_DRUGS_IV_BITZUA_STATE
  - BASAL_BITZUA
  - BASAL_BITZUA_CONTEXT
  - BASAL_BITZUA_STATE
  - BMI_MEASURE
  - BMI_MEASURE_ON_ADMISSION
  - BOLUS_BITZUA
  - BOLUS_BITZUA_CONTEXT
  - BOLUS_BITZUA_STATE
  - CREATININE_MEASURE_ON_ADMISSION
  - CREATININE_SERUM_MEASURE
  - CREATININE_SERUM_MEASURE_STATE
  - CREATININE_SERUM_MEASURE_TREND
  - DEATH
  - DEATH_EVENT
  - DIABETES_DIAGNOSIS
  - DIABETES_DIAGNOSIS_CONTEXT
  - DISGLYCEMIA_EVENT
  - GLUCOSE_MEASURE
  - GLUCOSE_MEASURE_ON_ADMISSION_PATTER




In [14]:
# Process specific patients (Jupyter-compatible)
# patient_ids = [1000, 1001, 1002]
patient_stats = await mediator.run_async(
    max_concurrent=4,
    patient_subset=None
)

# Print results
for pid, stats in patient_stats.items():
    if "error" in stats:
        print(f"❌ Patient {pid}: {stats['error']}")
    else:
        total = sum(v for k, v in stats.items() if isinstance(v, int))
        print(f"✅ Patient {pid}: {total} output rows")


PHASE 1: Building TAK Repository


Loading TAKs:  51%|█████▏    | 19/37 [00:00<00:00, 208.75file/s, States: ANTIDIABETIC_DRUGS_IV_BITZUA_STATE]BASAL_BITZUA_STATE: attribute idx=0 ('BASAL_DOSAGE', numeric) has no range covering -infinity. Very low values will be filtered out.
Loading TAKs:  54%|█████▍    | 20/37 [00:00<00:00, 205.57file/s, States: BASAL_BITZUA_STATE]                BOLUS_BITZUA_STATE: attribute idx=0 ('BOLUS_DOSAGE', numeric) has no range covering -infinity. Very low values will be filtered out.
Loading TAKs:  59%|█████▉    | 22/37 [00:00<00:00, 218.50file/s, States: BOLUS_BITZUA_STATE]CREATININE_SERUM_MEASURE_STATE: attribute idx=0 ('CREATININE_SERUM_MEASURE', numeric) has no range covering -infinity. Very low values will be filtered out.
Loading TAKs:  59%|█████▉    | 22/37 [00:00<00:00, 218.50file/s, States: CREATININE_SERUM_MEASURE_STATE]GLUCOSE_MEASURE_STATE: attribute idx=0 ('GLUCOSE_MEASURE', numeric) has no range covering -infinity. Very low values will be filtered out.
Loading TAKs: 100%|███████


[Validation] Running business-logic checks on TAK repository...

✅ TAK Repository Built Successfully
  Raw Concepts: 60
  Events:       16
  States:       20
  Trends:       8
  Contexts:     24
  Patterns:     20
  TOTAL TAKs:   37


PHASE 2: Processing 12 Patients (max_concurrent=4)



Processing patients: 100%|██████████| 12/12 [00:05<00:00,  2.40patient/s]


✅ Patient Processing Complete
  Patients processed: 12
  Total rows written: 2742
  Errors:             0

✅ Patient 1000: 83 output rows
✅ Patient 1001: 66 output rows
✅ Patient 1002: 345 output rows
✅ Patient 1003: 366 output rows
✅ Patient 1004: 122 output rows
✅ Patient 1005: 303 output rows
✅ Patient 1006: 348 output rows
✅ Patient 1007: 229 output rows
✅ Patient 1008: 366 output rows
✅ Patient 1009: 150 output rows
✅ Patient 1010: 287 output rows
✅ Patient 1011: 77 output rows





In [15]:
# Query OutputPatientData
query = """
SELECT PatientId, ConceptName, StartDateTime, EndDateTime, Value
FROM OutputPatientData
"""
df_results = pd.read_sql_query(query, da.conn)
df_results

Unnamed: 0,PatientId,ConceptName,StartDateTime,EndDateTime,Value
0,1002,DISGLYCEMIA_EVENT,2025-01-06 20:00:00,2025-01-06 20:00:01,Hyperglycemia
1,1000,DISGLYCEMIA_EVENT,2025-01-22 16:00:00,2025-01-22 16:00:01,Hypoglycemia
2,1000,DISGLYCEMIA_EVENT,2025-01-22 20:00:00,2025-01-22 20:00:01,Hypoglycemia
3,1000,DISGLYCEMIA_EVENT,2025-01-23 08:00:00,2025-01-23 08:00:01,Hypoglycemia
4,1000,DISGLYCEMIA_EVENT,2025-01-24 06:15:00,2025-01-24 06:15:01,Hypoglycemia
...,...,...,...,...,...
1022,1005,MEAL_CONTEXT,2025-01-23 06:00:00,2025-01-23 10:00:01,Breakfast
1023,1005,MEAL_CONTEXT,2025-01-23 11:00:00,2025-01-23 15:00:01,Lunch
1024,1005,MEAL_CONTEXT,2025-01-23 17:00:00,2025-01-23 21:00:01,Dinner
1025,1005,MEAL_CONTEXT,2025-01-24 06:00:00,2025-01-24 10:00:01,Breakfast


In [18]:
df_results[df_results['PatientId'].isin([1005])].head(50)

Unnamed: 0,PatientId,ConceptName,StartDateTime,EndDateTime,Value
870,1005,DISGLYCEMIA_EVENT,2025-01-17 08:00:00,2025-01-17 08:00:01,Hypoglycemia
871,1005,DISGLYCEMIA_EVENT,2025-01-17 20:00:00,2025-01-17 20:00:01,Hypoglycemia
872,1005,DISGLYCEMIA_EVENT,2025-01-19 00:00:00,2025-01-19 00:00:01,Hypoglycemia
873,1005,DISGLYCEMIA_EVENT,2025-01-22 06:15:00,2025-01-22 06:15:01,Hypoglycemia
874,1005,DISGLYCEMIA_EVENT,2025-01-23 16:00:00,2025-01-23 16:00:01,Hypoglycemia
875,1005,DISGLYCEMIA_EVENT,2025-01-24 08:00:00,2025-01-24 08:00:01,Hypoglycemia
885,1005,DISGLYCEMIA_EVENT,2025-01-15 04:00:00,2025-01-15 04:00:01,Hyperglycemia
886,1005,DISGLYCEMIA_EVENT,2025-01-16 20:00:00,2025-01-16 20:00:01,Hyperglycemia
889,1005,DISGLYCEMIA_EVENT,2025-01-22 04:00:00,2025-01-22 04:00:01,Hyperglycemia
890,1005,DISGLYCEMIA_EVENT,2025-01-23 04:00:00,2025-01-23 04:00:01,Hyperglycemia
