<a href="https://colab.research.google.com/github/thxsxth/RLMimic/blob/master/Dataset_and_DataLoader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
cd 'drive/My Drive/sepsis3-cohort'

/content/drive/My Drive/sepsis3-cohort


## We will test trajectories and analyze any issues

In [None]:
import torch
import numpy as np
import pandas as pd
import datetime as dt
import random
import time
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
import torch.nn.functional as F
import os
import glob
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence,pad_packed_sequence

### Making the necessary Imports

In [None]:
vitals=pd.read_csv('../Vitals/Vitals.csv',parse_dates=['charttime']) #pivoted vitals
sofa=pd.read_csv('../pivoted_sofa/pivoted_sofa.csv',parse_dates=['endtime','starttime']) #pivoted sofa
labs=pd.read_csv('../pivoted_labs/Pivoted_labs.csv',parse_dates=['charttime'])


In [None]:
vitals['TempC']=vitals['TempC'].ffill()
sofa['GCS_min']=sofa['GCS_min'].ffill()
labs['icustay_id']=labs['ICUSTAY_ID']
sofa[['rate_epinephrine','rate_norepinephrine','rate_dopamine',	'rate_dobutamine']]=sofa[['rate_epinephrine','rate_norepinephrine','rate_dopamine',	'rate_dobutamine']].fillna(0)
sofa['Vaso']=sofa['rate_epinephrine']+sofa['rate_norepinephrine']+sofa['rate_dobutamine']+sofa['rate_dopamine']

In [None]:
co=pd.read_csv('sepsis3_adults.csv',
               parse_dates=['intime','outtime','suspected_infection_time_poe']) #cohort + demographics
co=co.set_index('icustay_id')

In [None]:
admissions=pd.read_csv('admissions.csv',parse_dates=['ADMITTIME','DISCHTIME','DEATHTIME'])
admissions=admissions.set_index('icustay_id').sort_index()
co['death_time']=admissions['DEATHTIME']

In [None]:
input_cv=pd.read_csv('../Fluids/cleaned_input_cv.csv',parse_dates=['charttime']) 
input_mv=pd.read_csv('../Fluids/input_eventsMV.csv',parse_dates=['starttime','endtime'])

In [None]:
input_cv=input_cv[['icustay_id','charttime','tev']]
input_mv=input_mv[['icustay_id','endtime','tev']]
input_mv['tev_mv']=input_mv['tev']
input_mv['charttime']=input_mv['endtime']
input_mv=input_mv.drop('tev',axis=1)
input_fluids=input_mv.merge(input_cv,on=['icustay_id','charttime'],how='outer')[['icustay_id','charttime','tev','tev_mv']]
input_fluids['tev'],input_fluids['tev_mv']=input_fluids['tev'].fillna(0),input_fluids['tev_mv'].fillna(0)
input_fluids['volume']=input_fluids['tev']+input_fluids['tev_mv']
input_fluids=input_fluids.drop(['tev','tev_mv'],axis=1)

In [None]:
input_fluids.head()

Unnamed: 0,icustay_id,charttime,volume
0,200001,2181-11-25 22:55:00,50.0
1,200001,2181-11-27 15:47:00,200.0
2,200001,2181-11-27 23:26:00,250.0
3,200010,2132-08-05 02:10:00,17.708
4,200010,2132-08-05 01:36:00,1000.0


### Let's look at trajectory lengths

### Helper Functions

In [2]:
C1,C2=-0.125,-0.025
def get_rewards(df,dead):
  """
  Get's rewards for a trajectory
  df_sofa: Pandas Df which contains SOFA scores
  MUST BE REINDEXED to have time
  dead (bool): If the terminal is survival or death
  """
  # Calculate rewards for SOFA_{t+1} -SOFA_{t}

  # rewards1=C1*(df.SOFA_24hours-df.shift().SOFA_24hours).dropna().values
  rewards1=C1*(df.SOFA_24hours.values[1:]-df.SOFA_24hours.values[:-1])
  # print(rewards1.shape)
  
  rewards2=C2*((df.shift().SOFA_24hours.iloc[1:]==df.SOFA_24hours.iloc[1:])&(df.SOFA_24hours.iloc[1:]>0)).astype('int').values
  # print(rewards2.shape)
  
  rewards=rewards1+rewards2
  # Calculate Terminal rewards
  if dead:
    rewards=np.concatenate([rewards,[-15]])
  else:
    rewards=np.concatenate([rewards,[15]])

  return torch.FloatTensor(rewards).to(device)
  
  


In [None]:
def get_mini_batch_mask(mini_batch, seq_lengths):
    mask = torch.zeros(mini_batch.shape[0:2])
    for b in range(mini_batch.shape[0]):
        mask[b, 0:seq_lengths[b]] = torch.ones(seq_lengths[b])
    return mask.to(device)

### Dataset class

In [None]:
class patient_dataset(Dataset):
  """
  Implements a dataset for patients
  Needs Vitals,Sofa,Inputs,co tables
  """
  def __init__(self,patient_ids,train=True):
    #patient_ids :List/np.array
    self.ids=patient_ids
    self.train=train

  def __len__(self):
    return len(self.ids)

  def __getitem__(self,idx):
    #Get Patient from the index
    pat=self.ids[idx]
    pat_fluids=input_fluids[input_fluids.icustay_id==pat].set_index('charttime')
    pat_sofa=sofa[sofa.icustay_id==pat].set_index('endtime')
    pat_sofa=pd.concat([pat_sofa,pat_fluids]).resample('H').sum()

    pat_vitals=vitals[vitals.icustay_id==pat].set_index('charttime')
    pat_labs=labs[labs.icustay_id==pat]
    pat_df=pd.concat([pat_vitals,
                              pat_sofa]).resample('H').last()[['HeartRate','SysBP','DiasBP',	'MeanBP','RespRate','SpO2','TempC',
                                      'liver_24hours','cardiovascular_24hours',
                                      'cns_24hours','renal_24hours','SOFA_24hours','volume','Vaso']].resample('H').last()
  
    """
    TO DO: 
    Implement get_rewards
    Get age gender and if they died may be weight height
    GET TREATMENTS: Make it Tensor DONE
    GET Trajectory Make it Tensor DONE
    """
    dead=co.loc[pat].HOSPITAL_EXPIRE_FLAG==1
    if co.loc[pat].HOSPITAL_EXPIRE_FLAG==1:
          pat_df=pat_df.truncate(after=co.loc[pat].death_time)

    pat_df=pat_df.ffill().dropna()
    rewards=get_rewards(pat_df,dead)
    treatments=torch.FloatTensor(pat_df[['Vaso','volume']].values).to(device)
    trajectory=torch.FloatTensor(pat_df.drop(['Vaso','volume'],axis=1).values).to(device)

    return trajectory,treatments, rewards,dead






In [None]:
def collate_train(batch_data):

  """
  We will be a list of tuples,
  len(list) will be batch_size
  (trajectory,treatments,rewards) for each patient in batch
  """
  trajectories=[]
  treatments=[]
  seq_lens=[]
  rewards=[]
  dead_=[]

  for (trajectory,treatment,reward,dead) in batch_data:

    trajectories.append(trajectory) 
    treatments.append(treatment)
    seq_lens.append(trajectory.shape[0])
    rewards.append(reward)
    dead_.append(dead)

  padded_trajectories=pad_sequence(trajectories,batch_first=True)
  padded_treatments=pad_sequence(treatments,batch_first=True)
  padded_rewards=pad_sequence(rewards,batch_first=True)
  mask=get_mini_batch_mask(padded_trajectories,seq_lens)

  return padded_trajectories,padded_treatments,padded_rewards,mask,dead_
