In [2]:
import root_config as rc

rc.configure()

from detectdd.auth_bigquery import BigQueryClient
from detectdd.serializer import Serializer

try:
    cohort = Serializer().read_cohort()  # need to run 01-cohort.ipynb to produce the cohort
except FileNotFoundError:
    raise Exception("Need to run [01-cohort.ipynb] at least once to create the cohort file in the /out directory")


big_query = BigQueryClient.auth()

cohort.describe()


Loaded cohort from ..\out\cohort-full.out
<google.oauth2.credentials.Credentials object at 0x0000023169B17220> mimic-iv-desktop


Unnamed: 0,subject_id,hadm_id,stay_id,dose_b_time,event_count,num_icd_codes
count,23924.0,23924.0,23924.0,23924,23924.0,23924.0
mean,14952657.24373,25047174.107256,34995468.839534,2154-08-17 15:48:47.302291,3.019855,1.841331
min,10001884.0,20022095.0,30002654.0,2110-02-10 06:30:00,0.0,1.0
25%,12418872.25,22585455.0,32500877.0,2134-04-14 20:37:30,1.0,1.0
50%,15013503.0,25099207.0,34974279.0,2154-11-04 05:00:00,2.0,1.0
75%,17444849.0,27582114.25,37503072.0,2175-05-29 13:51:45,4.0,2.0
max,19995595.0,29997500.0,39995213.0,2209-05-30 09:27:00,17.0,10.0
std,2885815.078681,2884855.279498,2889069.445647,,2.558822,1.103696


## kidney diagnoses

In [3]:
kidney_diagnoses_query = """
SELECT d.subject_id, d.hadm_id, d.icd_code, i.long_title
FROM `physionet-data.mimiciv_hosp.diagnoses_icd` d
JOIN `physionet-data.mimiciv_hosp.d_icd_diagnoses` i ON d.icd_code = i.icd_code
WHERE i.long_title LIKE '%kidney%'
"""

kidney_diagnoses_results = big_query.query(kidney_diagnoses_query).result().to_dataframe()


In [4]:
kidney_diagnoses_results

Unnamed: 0,subject_id,hadm_id,icd_code,long_title
0,10004235,24181354,58389,"Nephritis and nephropathy, not specified as ac..."
1,10030753,26429826,V1651,Family history of malignant neoplasm of kidney
2,10116621,28927488,7944,Nonspecific abnormal results of function study...
3,10172206,26783176,5839,"Nephritis and nephropathy, not specified as ac..."
4,10597808,23151286,5932,"Cyst of kidney, acquired"
...,...,...,...,...
198914,19297319,27429566,I130,Hypertensive heart and chronic kidney disease ...
198915,12489419,20908680,I130,Hypertensive heart and chronic kidney disease ...
198916,16134144,27744298,I130,Hypertensive heart and chronic kidney disease ...
198917,19312736,21311611,I130,Hypertensive heart and chronic kidney disease ...


## kidney prescriptions

In [5]:
kidney_prescriptions_query = """
WITH kidney_patients AS (
    SELECT DISTINCT d.subject_id, d.hadm_id
    FROM `physionet-data.mimiciv_hosp.diagnoses_icd` d
    JOIN `physionet-data.mimiciv_hosp.d_icd_diagnoses` i ON d.icd_code = i.icd_code
    WHERE i.long_title LIKE '%kidney%'
    LIMIT 100000
)

SELECT p.subject_id, p.hadm_id, p.drug
FROM `physionet-data.mimiciv_hosp.prescriptions` p
JOIN kidney_patients kp ON p.subject_id = kp.subject_id AND p.hadm_id = kp.hadm_id
LIMIT 100000
"""

kidney_prescriptions_results = big_query.query(kidney_prescriptions_query).result().to_dataframe()


In [6]:
kidney_prescriptions_results

Unnamed: 0,subject_id,hadm_id,drug
0,16536691,25068958,Heparin
1,19295613,25201441,Albuterol 0.083% Neb Soln
2,15483933,25067431,Aspirin
3,10099032,24194487,Calcium Gluconate
4,16758327,22779550,Calcium Gluconate
...,...,...,...
99995,17277385,25913000,Carvedilol
99996,19907026,24069513,Carvedilol
99997,17906511,22562301,Carvedilol
99998,19714547,24785207,Carvedilol


## kidney lab

In [7]:
kidney_lab_query = """
WITH kidney_patients AS (
    SELECT DISTINCT d.subject_id, d.hadm_id
    FROM `physionet-data.mimiciv_hosp.diagnoses_icd` d
    JOIN `physionet-data.mimiciv_hosp.d_icd_diagnoses` i ON d.icd_code = i.icd_code
    WHERE i.long_title LIKE '%kidney%'
    LIMIT 100000
)

SELECT l.subject_id, l.hadm_id, i.label, l.value, l.valuenum, l.valueuom
FROM `physionet-data.mimiciv_hosp.labevents` l
JOIN `physionet-data.mimiciv_hosp.d_labitems` i ON l.itemid = i.itemid
WHERE i.label IN ('Creatinine', 'BUN') 
AND l.subject_id IN (SELECT subject_id FROM kidney_patients)
AND l.hadm_id IN (SELECT hadm_id FROM kidney_patients)
LIMIT 100000
"""

kidney_lab_results = big_query.query(kidney_lab_query).result().to_dataframe()


In [8]:
kidney_lab_results

Unnamed: 0,subject_id,hadm_id,label,value,valuenum,valueuom
0,10015860,25103777,Creatinine,8.1,8.1,mg/dL
1,10016084,23267624,Creatinine,4.5,4.5,mg/dL
2,10067389,23577021,Creatinine,4.2,4.2,mg/dL
3,10094811,29711874,Creatinine,5.7,5.7,mg/dL
4,10108523,26060973,Creatinine,5.2,5.2,mg/dL
...,...,...,...,...,...,...
99995,15622498,21977260,Creatinine,1.8,1.8,mg/dL
99996,15788134,25022809,Creatinine,1.8,1.8,mg/dL
99997,15814592,29816032,Creatinine,1.8,1.8,mg/dL
99998,15906662,24072687,Creatinine,1.8,1.8,mg/dL


In [9]:
merged_data = kidney_diagnoses_results.merge(kidney_lab_results, on=['subject_id', 'hadm_id'], how='left').merge(kidney_prescriptions_results, on=['subject_id', 'hadm_id'], how='left')

In [10]:
merged_data

Unnamed: 0,subject_id,hadm_id,icd_code,long_title,label,value,valuenum,valueuom,drug
0,10004235,24181354,58389,"Nephritis and nephropathy, not specified as ac...",Creatinine,7.3,7.3,mg/dL,Amiodarone
1,10004235,24181354,58389,"Nephritis and nephropathy, not specified as ac...",Creatinine,7.3,7.3,mg/dL,Bisacodyl
2,10004235,24181354,58389,"Nephritis and nephropathy, not specified as ac...",Creatinine,1.8,1.8,mg/dL,Amiodarone
3,10004235,24181354,58389,"Nephritis and nephropathy, not specified as ac...",Creatinine,1.8,1.8,mg/dL,Bisacodyl
4,10030753,26429826,V1651,Family history of malignant neoplasm of kidney,,,,,NIFEdipine CR
...,...,...,...,...,...,...,...,...,...
589702,19312736,21311611,I130,Hypertensive heart and chronic kidney disease ...,Creatinine,1.3,1.3,mg/dL,Vitamin D
589703,19312736,21311611,I130,Hypertensive heart and chronic kidney disease ...,Creatinine,1.3,1.3,mg/dL,Meropenem
589704,19312736,21311611,I130,Hypertensive heart and chronic kidney disease ...,Creatinine,1.3,1.3,mg/dL,PredniSONE
589705,19312736,21311611,I130,Hypertensive heart and chronic kidney disease ...,Creatinine,1.3,1.3,mg/dL,QUEtiapine Fumarate


In [11]:
# merged_data.to_csv('merged_kidney_data.csv', index=False)

In [13]:
drug_combinations_query = """
WITH kidney_patients AS (
    SELECT DISTINCT d.subject_id, d.hadm_id
    FROM `physionet-data.mimiciv_hosp.diagnoses_icd` d
    JOIN `physionet-data.mimiciv_hosp.d_icd_diagnoses` i ON d.icd_code = i.icd_code
    WHERE i.long_title LIKE '%kidney%'
),

drugs_within_same_hour AS (
    SELECT 
        p1.subject_id,
        p1.hadm_id,
        p1.drug AS drug_a,
        p2.drug AS drug_b,
        p1.starttime AS starttime_a,
        p2.starttime AS starttime_b,
        COUNT(*) OVER (PARTITION BY p1.subject_id, p1.hadm_id, p1.drug, p2.drug) AS event_count
    FROM `physionet-data.mimiciv_hosp.prescriptions` p1
    JOIN `physionet-data.mimiciv_hosp.prescriptions` p2 
        ON p1.subject_id = p2.subject_id AND p1.hadm_id = p2.hadm_id
    WHERE p1.drug <> p2.drug 
        AND ABS(TIMESTAMP_DIFF(p1.starttime, p2.starttime, MINUTE)) <= 60
        AND p1.subject_id IN (SELECT subject_id FROM kidney_patients)
        AND p1.hadm_id IN (SELECT hadm_id FROM kidney_patients)
)

SELECT count(*) FROM drugs_within_same_hour
--LIMIT 100000
"""

drugs_combinations_results = big_query.query(drug_combinations_query).result().to_dataframe()

drugs_combinations_results.describe()


Unnamed: 0,f0_
count,1.0
mean,40442822.0
std,
min,40442822.0
25%,40442822.0
50%,40442822.0
75%,40442822.0
max,40442822.0


In [None]:
drugs_combinations_results