In [1]:
import root_config as rc
from detectdd import config
import pandas as pd

rc.configure()

from detectdd.auth_bigquery import BigQueryClient
from detectdd.serializer import Serializer
try:
    serializer = Serializer()
    cohort_with_icd = serializer.read_cohort()  # need to run 01-cohort.ipynb to produce the cohort
    print(len(cohort_with_icd))
    cohort_without_icd = serializer.read_cohort_with_no_icd()
    print(len(cohort_without_icd))
    cohort = pd.concat([cohort_with_icd, cohort_without_icd])
except FileNotFoundError:
    raise Exception("Need to run [01-cohort.ipynb] at least once to create the cohort file in the /out directory")

big_query = BigQueryClient.auth()

cohort.describe()

Loaded cohort from ..\out\cohort-full.out
23924
Loaded cohort from ..\out\cohort-no-icd-file.out
34705
<google.oauth2.credentials.Credentials object at 0x0000020236FB10F0> mimic-iv-desktop


Unnamed: 0,subject_id,hadm_id,stay_id,dose_b_time,event_count,num_icd_codes
count,58629.0,58629.0,58629.0,58629,58629.0,23924.0
mean,14997054.585342,25003742.782787,34987309.864197,2154-02-17 19:00:19.010388,3.192976,1.841331
min,10001884.0,20001361.0,30000484.0,2110-01-18 21:00:00,0.0,1.0
25%,12498965.0,22536779.0,32498449.0,2133-12-03 22:32:00,1.0,1.0
50%,15014371.0,25015072.0,34993600.0,2153-12-24 22:32:00,2.0,1.0
75%,17470891.0,27515027.0,37452361.0,2174-08-12 08:30:00,4.0,2.0
max,19999828.0,29999098.0,39996783.0,2209-05-30 09:27:00,20.0,10.0
std,2868790.150191,2880294.195572,2874105.296021,,2.563357,1.103696


In [None]:

from detectdd.query_multiplexer import WhereClauseGenerator
from detectdd.query_multiplexer import QueryMultiplexer
import pandas as pd
from detectdd.auth_bigquery import BigQueryClient

# Assuming you have a Serializer class that handles reading your saved cohort data
serializer = Serializer()

# Extract unique subject_ids from the cohort data
subject_ids = cohort['subject_id'].unique()

# Convert the list of subject_ids to a format suitable for SQL query
subject_id_str = ', '.join([str(id) for id in subject_ids])
# print(subject_id_str)
# Now, let's proceed to fetch the vital signs for these subject_ids from MIMIC

query_multiplexer = QueryMultiplexer(big_query)

# Write a SQL query to fetch the required vitals where the subject_ids are in your cohort
query = """
SELECT stay_id, subject_id, charttime, heart_rate, sbp, dbp, mbp
FROM `physionet-data.mimiciv_derived.vitalsign`
WHERE ($where) 
    AND (heart_rate IS NOT NULL OR sbp IS NOT NULL OR dbp IS NOT NULL OR mbp IS NOT NULL)
"""

# query = f"""
# SELECT subject_id, heart_rate, sbp, dbp, mbp
# FROM `physionet-data.mimiciv_derived.vitalsign`
# WHERE subject_id IN ({subject_id_str}) limit 100"""

where_fragment = "(stay_id= $stay_id AND charttime > '$dose_b_time' AND charttime < DATETIME_ADD('$dose_b_time', INTERVAL 720 MINUTE))"

multimap_data = {k: v.tolist() for k, v in cohort.groupby('stay_id')['dose_b_time']}
results = query_multiplexer.multiplex_query(query, multi_map_data=multimap_data, where_clause=WhereClauseGenerator(where_fragment, "stay_id", "dose_b_time"))

# Run the query
# vitals_data = big_query.query(query).to_dataframe()
vitals_data = results
# Now you have the vital signs data for the patients in your cohort.
# You can proceed to clean this data as needed and use it for further analysis or machine learning model training.

# If you need to save this data locally for further use in your ML training notebook, you can do so like this:
vitals_data.to_csv(config.out_dir /'vitals_data.csv', index=False)
 

In [None]:
vitals_data

In [None]:

vitals_data.to_csv(config.out_dir /'vitals_data-10.csv', index=False)