In [1]:
from google.cloud import bigquery
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import os
project_id='icu-data-260103' # @param
os.environ["GOOGLE_CLOUD_PROJECT"]=project_id
client = bigquery.Client(location="US")

In [3]:
query = """
    SELECT *
    FROM `icu-data-260103.mimiciii_glucose.glucose_insulin_realtime`
"""
query_job = client.query(
    query,
    # Location must match that of the dataset(s) referenced in the query.
    location="US",
)  # API request - starts the query

df = query_job.to_dataframe()
df.columns = df.columns.str.lower()
print(df.shape)

(318998, 17)


### Create time variables

In [4]:
df['timer'] = round((df.timer - df.icu_admissiontime).astype('timedelta64[m]') / 60.,ndigits=2)
df['timer_dt'] = round((df.timer_dt - df.icu_admissiontime).astype('timedelta64[m]') / 60.,ndigits=2)
df['starttime'] = round((df.starttime - df.icu_admissiontime).astype('timedelta64[m]') / 60.,ndigits=2)
df['endtime'] = round((df.endtime - df.icu_admissiontime).astype('timedelta64[m]') / 60.,ndigits=2)
df['icu_dischargetime'] = round((df.icu_dischargetime - df.icu_admissiontime).astype('timedelta64[m]') / 60.,ndigits=2)
df['icu_admissiontime'] = round((df.icu_admissiontime - df.icu_admissiontime).astype('timedelta64[m]') / 60.,ndigits=2)

### No treatment to zeros

In [5]:
df.loc[:,['input_short_injection', 'input_short_push', 'input_intermediate','input_long','input_hrs']] = df.loc[:,['input_short_injection', 'input_short_push', 'input_intermediate','input_long','input_hrs']].fillna(0)

### Masks

In [6]:
df['msk0'] = 1*df['glc'].isnull()

In [7]:
df['msk'] = 1*df['glc_dt'].isnull()

### Exclude if first row has no glucose

In [8]:
df = df.loc[~(df.glc.isnull() & df.glc_dt.isnull()),:].copy(deep=True)

### Sort

In [9]:
df.sort_values(by=['icustay_id','timer'],inplace=True)
df.reset_index(drop=True,inplace=True)

### View

In [10]:
df

Unnamed: 0,subject_id,hadm_id,icustay_id,icu_admissiontime,icu_dischargetime,timer,timer_dt,glc,glc_dt,input_short_injection,input_short_push,input_intermediate,input_long,starttime,endtime,input_hrs,infxstop,msk0,msk
0,55973,152234,200001,0.0,73.88,2.88,20.78,118.0,72.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
1,55973,152234,200001,0.0,73.88,20.78,26.88,72.0,135.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
2,55973,152234,200001,0.0,73.88,26.88,36.88,135.0,106.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
3,55973,152234,200001,0.0,73.88,36.88,44.88,106.0,125.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
4,55973,152234,200001,0.0,73.88,44.88,50.88,125.0,101.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
314195,69587,158288,299998,0.0,46.60,10.88,17.72,196.0,235.0,4.0,0.0,0.0,0.0,,,0.0,,0,0
314196,69587,158288,299998,0.0,46.60,17.72,23.37,235.0,162.0,6.0,0.0,0.0,0.0,,,0.0,,0,0
314197,69587,158288,299998,0.0,46.60,23.37,29.20,162.0,137.0,3.0,0.0,0.0,0.0,,,0.0,,0,0
314198,69587,158288,299998,0.0,46.60,29.20,35.55,137.0,189.0,0.0,0.0,0.0,0.0,,,0.0,,0,0


In [11]:
df.loc[df.glc_dt.isnull(),:]

Unnamed: 0,subject_id,hadm_id,icustay_id,icu_admissiontime,icu_dischargetime,timer,timer_dt,glc,glc_dt,input_short_injection,input_short_push,input_intermediate,input_long,starttime,endtime,input_hrs,infxstop,msk0,msk
9,11861,192256,200010,0.0,23.17,17.13,22.40,253.0,,6.0,0.0,0.0,0.0,,,0.0,,0,1
14,76603,179633,200024,0.0,9.13,0.43,1.33,81.0,,0.0,0.0,0.0,0.0,,,0.0,,0,1
16,41710,181955,200028,0.0,69.68,5.02,13.28,258.0,,6.0,0.0,0.0,40.0,,,0.0,,0,1
19,41710,181955,200028,0.0,69.68,23.83,29.12,335.0,,10.0,0.0,0.0,0.0,,,0.0,,0,1
21,41710,181955,200028,0.0,69.68,36.77,38.77,288.0,,0.0,0.0,0.0,0.0,,,0.0,,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
314149,58128,108958,299956,0.0,654.45,563.17,568.62,183.0,,6.0,0.0,0.0,0.0,,,0.0,,0,1
314152,58128,108958,299956,0.0,654.45,580.08,582.53,185.0,,0.0,0.0,0.0,0.0,,,0.0,,0,1
314170,50334,129555,299957,0.0,50.15,5.30,5.48,87.0,,0.0,0.0,0.0,0.0,4.57,5.48,1.0,0.0,0,1
314179,50334,129555,299957,0.0,50.15,13.30,13.35,105.0,,0.0,2.0,0.0,0.0,10.30,13.35,2.0,0.0,0,1


### Save

In [12]:
df.to_csv('../../data/treatment_only_analysis.csv',index=False)

In [13]:
df

Unnamed: 0,subject_id,hadm_id,icustay_id,icu_admissiontime,icu_dischargetime,timer,timer_dt,glc,glc_dt,input_short_injection,input_short_push,input_intermediate,input_long,starttime,endtime,input_hrs,infxstop,msk0,msk
0,55973,152234,200001,0.0,73.88,2.88,20.78,118.0,72.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
1,55973,152234,200001,0.0,73.88,20.78,26.88,72.0,135.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
2,55973,152234,200001,0.0,73.88,26.88,36.88,135.0,106.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
3,55973,152234,200001,0.0,73.88,36.88,44.88,106.0,125.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
4,55973,152234,200001,0.0,73.88,44.88,50.88,125.0,101.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
314195,69587,158288,299998,0.0,46.60,10.88,17.72,196.0,235.0,4.0,0.0,0.0,0.0,,,0.0,,0,0
314196,69587,158288,299998,0.0,46.60,17.72,23.37,235.0,162.0,6.0,0.0,0.0,0.0,,,0.0,,0,0
314197,69587,158288,299998,0.0,46.60,23.37,29.20,162.0,137.0,3.0,0.0,0.0,0.0,,,0.0,,0,0
314198,69587,158288,299998,0.0,46.60,29.20,35.55,137.0,189.0,0.0,0.0,0.0,0.0,,,0.0,,0,0
