In [1]:
# Cell 1: Setup and Test Connection
import sys
sys.path.append('../src')
from mimic_client import MIMICClient, test_connection

print("🔍 Testing MIMIC-IV BigQuery connection...")
if test_connection():
    client = MIMICClient()  # Now works from any directory!
    print("✅ Ready to explore MIMIC-IV data!")
else:
    print("❌ Connection failed. Check your setup.")

🔍 Testing MIMIC-IV BigQuery connection...
✅ Query executed: 1 rows returned
✅ Connection successful! Found 364,627 patients
✅ Ready to explore MIMIC-IV data!


In [2]:
# Cell 2: Explore Dataset Overview
print("\n📊 MIMIC-IV Dataset Overview:")
overview = client.explore_dataset()
print(overview.to_string(index=False))




📊 MIMIC-IV Dataset Overview:
✅ Query executed: 1 rows returned
✅ Query executed: 1 rows returned
✅ Query executed: 1 rows returned
✅ Query executed: 1 rows returned
    table_name row_count      status
      patients   364,627 ✅ Available
    admissions   546,028 ✅ Available
 diagnoses_icd 6,364,488 ✅ Available
procedures_icd   859,655 ✅ Available


In [3]:
# Cell 3: Sample Data from Key Tables
print("\n👥 PATIENTS Sample:")
patients_sample = client.get_table_sample('patients', limit=5)
print(patients_sample[['subject_id', 'gender', 'anchor_age']].to_string(index=False))

print("\n🏥 DIAGNOSES Sample:")
diagnoses_sample = client.get_table_sample('diagnoses_icd', limit=5)
print(diagnoses_sample[['subject_id', 'icd_code', 'icd_version']].to_string(index=False))




👥 PATIENTS Sample:
✅ Query executed: 5 rows returned
 subject_id gender  anchor_age
   10078138      F          18
   10180372      M          18
   10686175      M          18
   10851602      F          18
   10902424      F          18

🏥 DIAGNOSES Sample:
✅ Query executed: 5 rows returned
 subject_id icd_code  icd_version
   10000935    78052            9
   10000980    44021            9
   10000980    27800            9
   10000980    V8522            9
   10000980    72992            9


In [4]:
# Cell 4: Look for Oncology Cases
oncology_preview = client.query("""
SELECT 
    icd_code,
    COUNT(*) as frequency,
    CASE 
        WHEN icd_code LIKE 'C81%' THEN 'Hodgkin Lymphoma'
        WHEN icd_code LIKE 'C82%' OR icd_code LIKE 'C83%' THEN 'Non-Hodgkin Lymphoma'
        WHEN icd_code LIKE 'C90%' THEN 'Multiple Myeloma'
        WHEN icd_code LIKE 'C91%' OR icd_code LIKE 'C92%' THEN 'Leukemia'
        ELSE 'Other'
    END as malignancy_type
FROM `physionet-data.mimiciv_3_1_hosp.diagnoses_icd`
WHERE icd_version = 10 
    AND (icd_code LIKE 'C81%' OR icd_code LIKE 'C82%' OR 
         icd_code LIKE 'C83%' OR icd_code LIKE 'C90%' OR
         icd_code LIKE 'C91%' OR icd_code LIKE 'C92%')
GROUP BY icd_code, malignancy_type
ORDER BY frequency DESC
LIMIT 20
""")

print(f"\n🔬 Oncology ICD Codes Found:")
print(oncology_preview.to_string(index=False))

print(f"\n✅ Data exploration complete!")
print(f"Found {oncology_preview['frequency'].sum():,} oncology diagnoses in MIMIC-IV")

✅ Query executed: 20 rows returned

🔬 Oncology ICD Codes Found:
icd_code  frequency      malignancy_type
   C9000       1541     Multiple Myeloma
   C9110        976             Leukemia
   C8339        842 Non-Hodgkin Lymphoma
   C9200        803             Leukemia
   C8338        680 Non-Hodgkin Lymphoma
   C9201        429             Leukemia
   C8330        340 Non-Hodgkin Lymphoma
   C9202        288             Leukemia
   C9002        246     Multiple Myeloma
   C9100        244             Leukemia
   C9210        219             Leukemia
   C8333        162 Non-Hodgkin Lymphoma
   C8318        147 Non-Hodgkin Lymphoma
   C8331        144 Non-Hodgkin Lymphoma
   C8190        129     Hodgkin Lymphoma
   C9001        112     Multiple Myeloma
   C9101        112             Leukemia
   C8332        104 Non-Hodgkin Lymphoma
   C8290        101 Non-Hodgkin Lymphoma
   C9111         98             Leukemia

✅ Data exploration complete!
Found 7,717 oncology diagnoses in MIMIC-IV
