<a href="https://colab.research.google.com/github/thxsxth/RLMimic/blob/master/Model/Model_building.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Let's start with importing csv files

In [1]:
cd 'drive/My Drive/sepsis3-cohort'

/content/drive/My Drive/sepsis3-cohort


In [0]:
import torch
import numpy as np
import pandas as pd
import datetime as dt
import random
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
import torch.nn.functional as F
import os
import glob

In [0]:
vitals=pd.read_csv('../Vitals/Vitals.csv',parse_dates=['charttime']) #pivoted vitals
sofa=pd.read_csv('../pivoted_sofa/pivoted_sofa.csv',parse_dates=['endtime','starttime']) #pivoted sofa


In [0]:
co=pd.read_csv('sepsis3_adults.csv',parse_dates=['intime','outtime']) #cohort + demographics

In [0]:
input_cv=pd.read_csv('../Fluids/cleaned_input_cv.csv',parse_dates=['charttime']) 
input_mv=pd.read_csv('../Fluids/input_eventsMV.csv',parse_dates=['starttime','endtime'])

In [0]:
## Consider only the cohort
vitals=vitals[vitals.icustay_id.isin(set(co.icustay_id))]
sofa=sofa[sofa.icustay_id.isin(set(co.icustay_id))]

In [0]:
# vitals.to_csv('vitals_demo.csv')

### Cleaning Dataframes

In [0]:
# 
sofa[['rate_epinephrine','rate_norepinephrine','rate_dopamine',	'rate_dobutamine']]=sofa[['rate_epinephrine','rate_norepinephrine','rate_dopamine',	'rate_dobutamine']].fillna(0)

In [10]:
sofa['vaso_rate']=sofa['rate_epinephrine']+sofa['rate_norepinephrine']+sofa['rate_dobutamine']+sofa['rate_dopamine']
sofa['vaso_rate'].describe()

count    3.821099e+06
mean     1.534592e-01
std      1.389909e+00
min      0.000000e+00
25%      0.000000e+00
50%      0.000000e+00
75%      0.000000e+00
max      5.247707e+02
Name: vaso_rate, dtype: float64

In [0]:
sofa=sofa[['icustay_id','endtime','vaso_rate','rate_norepinephrine','rate_dopamine',	'rate_dobutamine','urineoutput','cardiovascular_24hours',	'liver_24hours','cns_24hours',	'renal_24hours',	'SOFA_24hours']]

In [0]:
## Cleaning and concatenating Fluid Inputs
input_cv=input_cv[['icustay_id','charttime','tev']]
input_mv=input_mv[['icustay_id','endtime','tev']]
input_mv['tev_mv']=input_mv['tev']
input_mv['charttime']=input_mv['endtime']
input_mv=input_mv.drop('tev',axis=1)
input_fluids=input_mv.merge(input_cv,on=['icustay_id','charttime'],how='outer')[['icustay_id','charttime','tev','tev_mv']]
input_fluids['tev'],input_fluids['tev_mv']=input_fluids['tev'].fillna(0),input_fluids['tev_mv'].fillna(0)
input_fluids['volume']=input_fluids['tev']+input_fluids['tev_mv']
input_fluids=input_fluids[input_fluids.icustay_id.isin(set(co.icustay_id))]

Include age,gender BMI for Vitals df

In [0]:
## Re Index so it's easier to find
co=co.set_index('icustay_id')
vitals['age']=co.loc[vitals['icustay_id']]['age'].values
vitals['gender']=co.loc[vitals['icustay_id']]['is_male'].values
vitals['bmi']=co.loc[vitals['icustay_id']]['bmi'].values


In [23]:
sofa.head(),vitals.head(),co.head()

(   icustay_id             endtime  ...  renal_24hours  SOFA_24hours
 0      200001 2181-11-25 19:00:00  ...              2             3
 1      200001 2181-11-25 20:00:00  ...              2             3
 2      200001 2181-11-25 21:00:00  ...              2             3
 3      200001 2181-11-25 22:00:00  ...              2             3
 4      200001 2181-11-25 23:00:00  ...              3             4
 
 [5 rows x 11 columns],
    subject_id  icustay_id           charttime  ...   age  gender       bmi
 0       55973      200001 2181-11-25 19:06:00  ...  61.0       0  21.06264
 1       55973      200001 2181-11-25 19:07:00  ...  61.0       0  21.06264
 2       55973      200001 2181-11-25 19:08:00  ...  61.0       0  21.06264
 3       55973      200001 2181-11-25 19:14:00  ...  61.0       0  21.06264
 4       55973      200001 2181-11-25 19:16:00  ...  61.0       0  21.06264
 
 [5 rows x 14 columns],
             Unnamed: 0  hadm_id  excluded  ... abx_poe sepsis-3 sofa>=2
 icus

### Necessary Imports

In [0]:
import torch
import numpy as np
import pandas as pd
import datetime as dt
import random
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
import torch.nn.functional as F
import os
import glob

In [0]:
device='cuda' if torch.cuda.is_available() else 'cpu'

#### Let's define training and validation cohorts

In [0]:
training_cohort=np.random.choice(list(co.index),int(0.8*len(list(co.index))),replace=False)
valid_cohort=list(set(co.index)-set(training_cohort))

In [36]:
set(valid_cohort).intersection(set(training_cohort)),len(training_cohort)+len(valid_cohort)==len(list(co.index))

(set(), True)

So as expected we don't have any common elements, and everything is accounted for

In [0]:
class MyDataLoader():
  """
  Instance of MyDataLoader class yeilds batches of trajectories , treatments

  """
  def __init__(self,sofa_df=sofa,vitals_df=vitals,input_df=input_fluids,cohort=co,batch_size=16,icustay_list=training_cohort):

    """
    sofa_df (pd.Dataframe): Pivoted Sofa Dataframe (Also includes Vasopressors)
    vitals (pd.Dataframe): Pivoted vitals
    input_df (pd.Dataframe):Input fluids (CV and MV concatanated)
    cohort(pd.Dataframe): Cohort Dataframe (contains some demographics)
    batch_size (int):batch size
    icu_list (iterable): List of patient Ids

    """
    self.sofa=sofa_df
    self.vitals=vitals_df
    self.batch_size=batch_size
    self.icustays=icustay_list
    self.input_fluids=input_df
    self.cohort=cohort
     
    
  def __iter__(self):
     np.random.shuffle(self.icustays)
     patients=self.icustays
     for k in range(0,len(patients)-self.batch_size,self.batch_size):
          batch_patients=patients[k:k+self.batch_size]   # Iterable containing Batch_size IDS          
          treatments=[]
          trajectories=[]
          
          for pat in batch_patients:
              temp_v=self.vitals[self.vitals['icustay_id']==pat].set_index('charttime')
              temp_sofa=self.sofa[self.sofa['icustay_id']==pat].set_index('endtime')
          
              # sus_time=self.sus_dict[pat]
              ## Get the data points after suspection of infection
             
              
              ## Also need to consider the suspected infection
              df=pd.concat([self.vitals[self.vitals.icustay_id==pat].set_index('charttime'),
                              self.input_fluids[self.input_fluids.icustay_id==pat].set_index('charttime'),
                              self.sofa[self.sofa.icustay_id==pat].set_index('endtime')]).resample('H').last()
              
              # df=pd.concat([temp_v, temp_sofa]).resample("H").last()
              # df.dropna()
              
              trajectories.append(df[['age','bmi','gender','HeartRate','SysBP','DiasBP',	'MeanBP','RespRate','TempC','SpO2',
                                      'liver_24hours','cardiovascular_24hours','cns_24hours','renal_24hours','SOFA_24hours']].values)
              
              # trajectories.append(df[['age','bmi','HeartRate','SysBP','DiasBP',	'MeanBP','RespRate','TempC','SpO2',
              #                         'cardiovascular_24hours','cns_24hours','renal_24hours','SOFA_24hours']].values)
              
              actions=df[['vaso_rate','volume']]
              treatments.append(actions.values)

          ## Probally pad trajectories too? and lengths
          yield trajectories, treatments
          # yield torch.FloatTensor(trajectories).to(device), torch.FloatTensor(treatments).to(device)

              

         
     


#### Testing the data loader

Works when returning lists of trajectories list has length L, and trajectory[i].shape : T*D (D is the Dimension)

In [0]:
training_loader= MyDataLoader()

In [82]:
for i, (trajectory,treatment) in enumerate(training_loader):
  print('Batch number {}'.format(i))
  print(len(trajectory),trajectory[0].shape)
  print(len(treatment),treatment[0].shape)
  if i==4:
    break

Batch number 0
16 (51, 13)
16 (51, 2)
Batch number 1
16 (56, 13)
16 (56, 2)
Batch number 2
16 (28, 13)
16 (28, 2)
Batch number 3
16 (69, 13)
16 (69, 2)
Batch number 4
16 (1125, 13)
16 (1125, 2)
