In [1]:
import root_config as rc

rc.configure()

from detectdd.auth_bigquery import BigQueryClient
from detectdd.serializer import Serializer

try:
    cohort = Serializer().read_cohort()  # need to run 01-cohort.ipynb to produce the cohort
except FileNotFoundError:
    raise Exception("Need to run [01-cohort.ipynb] at least once to create the cohort file in the /out directory")


big_query = BigQueryClient.auth()

cohort.describe()


Loaded cohort from ..\out\cohort-full.out
<google.oauth2.credentials.Credentials object at 0x00000149A09A0DC0> lucky-curve-395616


Unnamed: 0,subject_id,hadm_id,stay_id,dose_b_time,event_count,num_icd_codes
count,13918.0,13918.0,13918.0,13918,13918.0,13918.0
mean,14962882.262107,24985829.341213,34988872.550582,2154-11-03 04:32:33.138382,2.140609,1.859103
min,10001884.0,20022095.0,30004144.0,2110-02-10 06:30:00,0.0,1.0
25%,12504186.0,22503715.0,32511600.0,2133-06-25 10:45:00,1.0,1.0
50%,14978206.0,25025631.0,34986873.0,2155-12-23 12:44:30,2.0,2.0
75%,17399295.0,27440064.0,37510396.25,2176-03-09 16:44:00,3.0,2.0
max,19995595.0,29997500.0,39987478.0,2208-06-15 19:48:00,9.0,10.0
std,2838772.908197,2855966.941545,2884324.657779,,1.619336,1.087754


## kidney diagnoses

In [2]:
kidney_diagnoses_query = """
SELECT d.subject_id, d.hadm_id, d.icd_code, i.long_title
FROM `physionet-data.mimiciv_hosp.diagnoses_icd` d
JOIN `physionet-data.mimiciv_hosp.d_icd_diagnoses` i ON d.icd_code = i.icd_code
WHERE i.long_title LIKE '%kidney%'
"""

kidney_diagnoses_results = big_query.query(kidney_diagnoses_query).result().to_dataframe()


In [3]:
kidney_diagnoses_results

Unnamed: 0,subject_id,hadm_id,icd_code,long_title
0,10004235,24181354,58389,"Nephritis and nephropathy, not specified as ac..."
1,10030753,26429826,V1651,Family history of malignant neoplasm of kidney
2,10116621,28927488,7944,Nonspecific abnormal results of function study...
3,10172206,26783176,5839,"Nephritis and nephropathy, not specified as ac..."
4,10597808,23151286,5932,"Cyst of kidney, acquired"
...,...,...,...,...
198914,19297319,27429566,I130,Hypertensive heart and chronic kidney disease ...
198915,12489419,20908680,I130,Hypertensive heart and chronic kidney disease ...
198916,16134144,27744298,I130,Hypertensive heart and chronic kidney disease ...
198917,19312736,21311611,I130,Hypertensive heart and chronic kidney disease ...


## kidney prescriptions

In [4]:
kidney_prescriptions_query = """
WITH kidney_patients AS (
    SELECT DISTINCT d.subject_id, d.hadm_id
    FROM `physionet-data.mimiciv_hosp.diagnoses_icd` d
    JOIN `physionet-data.mimiciv_hosp.d_icd_diagnoses` i ON d.icd_code = i.icd_code
    WHERE i.long_title LIKE '%kidney%'
    LIMIT 100000
)

SELECT p.subject_id, p.hadm_id, p.drug
FROM `physionet-data.mimiciv_hosp.prescriptions` p
JOIN kidney_patients kp ON p.subject_id = kp.subject_id AND p.hadm_id = kp.hadm_id
LIMIT 100000
"""

kidney_prescriptions_results = big_query.query(kidney_prescriptions_query).result().to_dataframe()


In [5]:
kidney_prescriptions_results

Unnamed: 0,subject_id,hadm_id,drug
0,10771068,24650455,Heparin
1,12723597,28612481,Heparin
2,15533039,28396110,Albuterol 0.083% Neb Soln 1Neb
3,10386303,23372208,Calcium Gluconate
4,11166600,25863147,EPINEPHrine (for dilution)
...,...,...,...
99995,18376342,23636482,Calcium Acetate
99996,16273932,25757574,Calcium Acetate
99997,17341130,28017025,Calcium Acetate
99998,18300417,26122760,Calcium Acetate


## kidney lab

In [6]:
kidney_lab_query = """
WITH kidney_patients AS (
    SELECT DISTINCT d.subject_id, d.hadm_id
    FROM `physionet-data.mimiciv_hosp.diagnoses_icd` d
    JOIN `physionet-data.mimiciv_hosp.d_icd_diagnoses` i ON d.icd_code = i.icd_code
    WHERE i.long_title LIKE '%kidney%'
    LIMIT 100000
)

SELECT l.subject_id, l.hadm_id, i.label, l.value, l.valuenum, l.valueuom
FROM `physionet-data.mimiciv_hosp.labevents` l
JOIN `physionet-data.mimiciv_hosp.d_labitems` i ON l.itemid = i.itemid
WHERE i.label IN ('Creatinine', 'BUN') 
AND l.subject_id IN (SELECT subject_id FROM kidney_patients)
AND l.hadm_id IN (SELECT hadm_id FROM kidney_patients)
LIMIT 100000
"""

kidney_lab_results = big_query.query(kidney_lab_query).result().to_dataframe()


In [7]:
kidney_lab_results

Unnamed: 0,subject_id,hadm_id,label,value,valuenum,valueuom
0,10014136,24097334,Creatinine,11.8,11.8,mg/dL
1,10015860,22413744,Creatinine,4.1,4.1,mg/dL
2,10049941,21629437,Creatinine,7.3,7.3,mg/dL
3,10099032,24194487,Creatinine,6.6,6.6,mg/dL
4,10119992,27298430,Creatinine,4.5,4.5,mg/dL
...,...,...,...,...,...,...
99995,15360211,28546355,Creatinine,1.8,1.8,mg/dL
99996,15519663,29912576,Creatinine,___,1.8,mg/dL
99997,15575292,26609301,Creatinine,1.8,1.8,mg/dL
99998,15624993,24300186,Creatinine,1.8,1.8,mg/dL


In [8]:
merged_data = kidney_diagnoses_results.merge(kidney_lab_results, on=['subject_id', 'hadm_id'], how='left').merge(kidney_prescriptions_results, on=['subject_id', 'hadm_id'], how='left')

In [9]:
merged_data

Unnamed: 0,subject_id,hadm_id,icd_code,long_title,label,value,valuenum,valueuom,drug
0,10004235,24181354,58389,"Nephritis and nephropathy, not specified as ac...",Creatinine,4.8,4.8,mg/dL,Bag
1,10004235,24181354,58389,"Nephritis and nephropathy, not specified as ac...",Creatinine,2.5,2.5,mg/dL,Bag
2,10030753,26429826,V1651,Family history of malignant neoplasm of kidney,,,,,Zolpidem Tartrate
3,10116621,28927488,7944,Nonspecific abnormal results of function study...,Creatinine,1.5,1.5,mg/dL,
4,10172206,26783176,5839,"Nephritis and nephropathy, not specified as ac...",Creatinine,1.2,1.2,mg/dL,Bag
...,...,...,...,...,...,...,...,...,...
582925,19312736,21311611,I130,Hypertensive heart and chronic kidney disease ...,Creatinine,0.9,0.9,mg/dL,Metoprolol Tartrate
582926,19312736,21311611,I130,Hypertensive heart and chronic kidney disease ...,Creatinine,0.9,0.9,mg/dL,Pantoprazole
582927,19312736,21311611,I130,Hypertensive heart and chronic kidney disease ...,Creatinine,0.9,0.9,mg/dL,QUEtiapine Fumarate
582928,19312736,21311611,I130,Hypertensive heart and chronic kidney disease ...,Creatinine,0.9,0.9,mg/dL,Amiodarone


In [12]:
merged_data.to_csv('merged_kidney_data.csv', index=False)