# TempoQL Demo on Full MIMIC-IV Dataset

Before running, please `cd` into this repo and run `pip install .` in the virtual environment of your choice. This will install required dependencies so that you can run this notebook.

This example will use the full MIMIC-IV dataset available [in BigQuery through Physionet](https://physionet.org/content/mimiciv/3.1/). Please ensure that you have access to the dataset through Physionet, and set the `project_id` variable below to your GCP project for billing.

In [None]:
!pip install tempo-ql
!pip install sqlalchemy-bigquery

In [None]:
from tempo_ql import GenericDataset, formats, QueryEngine, FileVariableStore
import numpy as np
import os
import pandas as pd
import time

In [None]:
# GCP project in which to run queries - make sure it has access to MIMIC-IV through physionet.org
project_id = None
# name of an EXISTING dataset within your GCP project to store temporary results. Required if you plan to subset the data to run queries
scratch_dataset = "tempo_ql_scratch_mimic"
# directory to store temporary variables
variable_store_dir = "mimiciv_data"

# provide your Gemini API key here if you'd like to try the LLM-assisted authoring workflow
gemini_api_key = ""

In [None]:
# Initialize query engine and variable store
dataset = GenericDataset(f'bigquery://{project_id}', formats.mimiciv(), 
                        scratch_schema_name=f'{project_id}.{scratch_dataset}' if scratch_dataset is not None else None)

if not os.path.exists(variable_store_dir): os.mkdir(variable_store_dir)
var_store = FileVariableStore(variable_store_dir)
query_engine = QueryEngine(dataset, variable_stores=[var_store])

In [None]:
query_engine.query("{Anchor Age; scope = patient}")

In [None]:
query_engine.query("{Respiratory Rate; scope = chartevents}")

In [None]:
# Equivalent BigQuery SQL code

import pandas_gbq

df = pandas_gbq.read_gbq(f"""
    WITH matching_eventids AS (
        SELECT DISTINCT d.itemid AS itemid FROM `physionet-data.mimiciv_3_1_icu.d_items` d
        WHERE d.label = 'Respiratory Rate'
    )
    SELECT ce.stay_id AS stay_id, 
                    ce.charttime AS time, 
                    ce.itemid AS eventtype,
                    ce.value AS value
                FROM `physionet-data.mimiciv_3_1_icu.chartevents` ce
                INNER JOIN `physionet-data.mimiciv_3_1_icu.icustays` stays
                ON ce.stay_id = stays.stay_id
                INNER JOIN matching_eventids 
                ON ce.itemid = matching_eventids.itemid
                ORDER BY stay_id, time ASC
""", project_id=project_id)
df

In [None]:
# Subset the trajectory IDs if you'd like to run queries faster. This requires you to have set up
# a dataset in your GCP project to use as the scratch dataset above. (We store the sample inside
# the database to speed up queries.)
dataset.set_trajectory_ids(dataset.get_ids(), sample_size=0.1)

# The sample you select will be persisted across sessions. Call this to reset the sample:
# dataset.reset_trajectory_ids()

In [None]:
query_engine.interactive(api_key=gemini_api_key)