In [None]:
from tempo_ql import GenericDataset, formats, QueryEngine, FileVariableStore
import numpy as np
import os
from pathlib import Path
import pandas as pd
import json

In [22]:
local_cache_dir = './_cache'
if not os.path.exists(local_cache_dir):
    os.mkdir(local_cache_dir)

In [23]:
db_format = formats.omop(table_prefix='', 
                         id_field='person_id')
db_format.tables[0]['concept_id_field'] = 'drug_concept_id'
db_format.tables[1]['concept_id_field'] = 'condition_concept_id'

print(db_format.tables)

[{'source': 'drug_exposure', 'type': 'interval', 'id_field': 'person_id', 'concept_id_field': 'drug_concept_id', 'start_time_field': 'drug_exposure_start_datetime', 'end_time_field': 'drug_exposure_end_datetime', 'default_value_field': 'quantity', 'scope': 'Drug'}, {'source': 'condition_occurrence', 'type': 'interval', 'id_field': 'person_id', 'concept_id_field': 'condition_concept_id', 'start_time_field': 'condition_start_datetime', 'end_time_field': 'condition_end_datetime', 'scope': 'Condition'}, {'source': 'procedure_occurrence', 'type': 'event', 'id_field': 'person_id', 'concept_id_field': 'procedure_concept_id', 'time_field': 'procedure_datetime', 'scope': 'Procedure'}, {'source': 'observation', 'type': 'event', 'id_field': 'person_id', 'concept_id_field': 'observation_concept_id', 'time_field': 'observation_datetime', 'default_value_field': 'value_as_string', 'scope': 'Observation'}, {'source': 'measurement', 'type': 'event', 'id_field': 'person_id', 'concept_id_field': 'measure

In [None]:
# Define connection string for database
creds = json.load(open(f"db-details.json", 'r'))
connection_string = f"postgresql+psycopg2://{creds['PG_USER']}:{creds['PG_PASS']}@{creds['DB_HOST']}:{creds['PORT']}/<DB_NAME>"

In [None]:
# Initialize query engine and variable store
var_store = FileVariableStore(os.path.join(local_cache_dir, 'variables'))

dataset = GenericDataset(connection_string,
                         db_format, 
                         schema_name='<SCHEMA_NAME>',
                         scratch_schema_name='scratch_<USER>',
                         data_elements_cache_dir=os.path.join(local_cache_dir, 'data_elements'),
                         verbose=True)
query_engine = QueryEngine(dataset, variable_stores=[var_store])

In [7]:
query_engine.dataset.get_scopes()

['Condition',
 'Device',
 'Drug',
 'Measurement',
 'Observation',
 'Observation Period',
 'Person',
 'Procedure',
 'Visit']

In [None]:
# get available concepts
names = query_engine.dataset.list_data_elements(scope='Drug', return_counts=True)
names

In [None]:
# Get semaglutide exposures
sglt_oi = query_engine.query("{name contains /semaglutide|ozempic/i; scope = Drug}")
sglt_oi

In [None]:
# 197320 - Acute renal failure syndrome (SNOMED)
adr_oi = query_engine.query("{id equals 197320; scope = Condition}")
adr_oi

In [None]:
var_store['semaglutide_rx'] = sglt_oi
var_store['aki_outcome'] = adr_oi

In [13]:
### first exposure to the SGLT1 and the renal failure is the first known event of renal failure 

In [None]:
coi = query_engine.query('''
((exists aki_outcome 
  from first_rx to first_rx + 90 days) 
where (not exists aki_outcome before first_rx))
with first_rx as (
  first starttime(semaglutide_rx) 
  from #mintime to #maxtime
)
''')

Querying primary ID table (observation_period) to get min times
Querying primary ID table (observation_period) to get max times


In [15]:
(coi.get_values() > 0).sum()

np.int64(3)

In [None]:
len(var_store["aki_outcome"].get_ids().unique())

59188

In [None]:
len(var_store["semaglutide_rx"].get_ids().unique())

129

In [18]:
## changing the variable names for better readability 
var_store['semaglutide_rx'] = sglt_oi
var_store['aki_outcome'] = adr_oi

## Other tests

In [None]:
slgt_before_adr = query_engine.query('''
 exists adr_oi from starttime(sglt_oi) to starttime(sglt_oi) + 180 days
''')

In [None]:
slgt_before_adr.get_values().mean()

In [None]:
slgt_before_adr

In [None]:
slgt_before_adr.filter(slgt_before_adr.get_values() > 0)

In [None]:
var_store["adr_oi"].filter(var_store["adr_oi"].get_ids() == 8444035800)

In [None]:
var_store["sglt_oi"].filter(var_store["sglt_oi"].get_ids() == 8444035800)

In [None]:
slgt_before_adr = query_engine.query('''
 exists adr_oi from starttime(sglt_oi) to starttime(sglt_oi) + 30 days
''')

In [None]:
slgt_before_adr.get_values().mean()

In [None]:
db_format.tables

### Scratch

In [None]:
# Get all person IDs
query_engine.get_ids()

In [None]:
# Run future queries only within a random subset if desired
random_sample = np.random.choice(query_engine.get_ids(), size=100)
query_engine.dataset.set_trajectory_ids(random_sample)

In [None]:
# perform one-off queries
visits = query_engine.query("{name contains /blood pressure/i; scope = Measurement}")
visits