# TempoQL Demo

This example uses the open-source MIMIC-IV Demo Dataset in the OMOP Common Data Model.

In [None]:
!pip install tempo-ql
!pip install duckdb

In [None]:
from tempo_ql import GenericDataset, formats, QueryEngine, FileVariableStore
import duckdb
import numpy as np
import os
from pathlib import Path
import pandas as pd
import requests
import zipfile
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='duckdb')

In [None]:
# Download the MIMIC-IV Demo Data in OMOP format

# set path to MIMIC demo data
base_path = Path('mimic-iv-demo-data-in-the-omop-common-data-model-0.9') / '1_omop_data_csv'
url = "https://physionet.org/content/mimic-iv-demo-omop/get-zip/0.9/"
zip_path = Path("mimic-iv-demo-0.9.zip")

if not base_path.exists():
    print(f"Downloading {url} -> {zip_path} ...")
    resp = requests.get(url, stream=True)
    resp.raise_for_status()
    with open(zip_path, "wb") as fh:
        for chunk in resp.iter_content(chunk_size=8192):
            if chunk:
                fh.write(chunk)
    print("Download complete, extracting...")
    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall()
    print("Extraction complete.")

In [None]:
# make sure this is the correct path to your MIMIC demo data
if not base_path.exists():
    raise ValueError("The path does not exist - are you sure you placed the MIMIC demo data in the correct directory?")

# provide your Gemini API key here if you'd like to try the LLM-assisted authoring workflow
gemini_api_key = ""

In [None]:
# Add data from the MIMIC-IV OMOP dataset

if not os.path.exists("example.db"):
    local_db = duckdb.connect("example.db")
    for csv_file in base_path.glob('*.csv'):
        table_name = csv_file.stem.replace("2b_", "")
        local_db.execute(f"create table {table_name} as select * from read_csv_auto('{csv_file}', header=true, ignore_errors=true, parallel=false)")
    local_db.close()

# Initialize query engine and variable store
var_store = FileVariableStore('example_cache')
query_engine = QueryEngine(GenericDataset("duckdb:///example.db", formats.omop(id_field='person_id', use_source_concept_ids=True, concept_id_field='concept_id')), 
                           variable_stores=[var_store])

In [None]:
# get available concepts in a scope
# print(query_engine.dataset.get_scopes())
names = query_engine.dataset.list_data_elements(scope='Procedure', return_counts=True)
names.head(20)

In [None]:
# Get all visit occurrence IDs
query_engine.get_ids()

## Interactive Python Widget

Looking for inspiration? Try out the following queries. (Note that the MIMIC-IV OMOP sample has some format inconsistencies so some concepts that are available in the traditional MIMIC format may not be queryable. See the full MIMIC-IV example to try queries that require more data.)

**Basic**
* How long are the ICU stays? `duration({Visit})`
* Which concepts are used to label sodium chloride infusions? `{name contains /sodium chloride/i; scope = Drug}`

**Aggregations**
* How often do patients have AFib recorded? `exists {AF (Atrial Fibrillation); scope = Condition} from #now - 24 hours to #now every 24 hours`
* How often are there recent temperature measurements? `count {Temperature Celsius; scope = Measurement} from #now - 4 hours to #now every 4 hours`
* What's the patient's global minimum mean blood pressure? `min {Non Invasive Blood Pressure mean; scope = Measurement} from #mintime to #maxtime`

In [None]:
# Start an interactive widget, using the LLM assistant if API key is provided
widget = query_engine.interactive(api_key=gemini_api_key, height=600)
widget

In [None]:
# Work in a JSON file to save queries (e.g. for version control)
query_engine.interactive(file_path='test_queries.json', api_key=gemini_api_key, height=600)

In [None]:
# Later, query from your saved file
query_engine.query_from('test_queries.json')

## Running Queries in Code

In [None]:
# perform one-off queries
visits = query_engine.query("{name contains /insulin/i; scope = Drug}")
visits

In [None]:
# See what SQL queries were run after your last query_engine.query() call
print(query_engine.dataset._captured_queries)

In [None]:
# Run future queries only within a random subset if desired
random_sample = np.random.choice(query_engine.get_ids(), size=50)
query_engine.dataset.set_trajectory_ids(random_sample)

In [None]:
# we can store variables by name...
var_store['SimpleTemperature'] = query_engine.query("union({Temperature Celsius}, ({Temperature Fahrenheit} - 32) * 5 / 9) where #value < 50")

In [None]:
# ... and then use their results later
query_engine.query("last SimpleTemperature before #now every 24 hours", return_subqueries=True)