how to run things

A simple(r) version of mimic.py

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
# data related
import pandas as pd
from src.data.data_loader import MIMICDataModule
import json
from sklearn.model_selection import KFold
import numpy as np

# use lightning framework
import pytorch_lightning as pl
from pytorch_lightning import seed_everything
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint

# import models
from src.models.models import *
from src.models.output import *

# other
import os

# plotting
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm
  warn(f"Failed to load image Python extension: {e}")


In [3]:
seed_everything(1)

Global seed set to 1


1

### Data

In [4]:
df = pd.read_csv('../data/mimic.csv')
df.sort_values(by=['stay_id','timer'],inplace=True)
df.reset_index(drop=True,inplace=True)
df.head()

Unnamed: 0,rn,subject_id,hadm_id,stay_id,timer,timer_next,timer_prev,glc,glc_next,glc_lab,...,tpn_rate,dextrose_fluid,delta_t,msk0,sum_msk0,n,gender_m,msk,timer_d,timer_next_d
0,0,12466550,23998182,30000153,2174-09-29 20:12:00+00:00,2174-09-30 13:00:00+00:00,2174-09-29 12:09:00+00:00,185.0,139.0,,...,0.0,0.0,16.8,0.0,0.0,1,1.0,0.0,8.05,24.85
1,1,12466550,23998182,30000153,2174-09-30 13:00:00+00:00,2174-09-30 20:00:00+00:00,2174-09-29 20:12:00+00:00,139.0,137.0,,...,0.0,0.0,7.0,0.0,0.0,2,1.0,0.0,24.85,31.85
2,3,13180007,27543152,30000213,2162-06-21 06:06:00+00:00,2162-06-21 07:00:00+00:00,2162-06-21 05:40:00+00:00,125.0,108.0,,...,0.0,0.0,0.9,0.0,1.0,1,1.0,0.0,0.466667,1.366667
3,4,13180007,27543152,30000213,2162-06-21 07:00:00+00:00,2162-06-21 07:45:00+00:00,2162-06-21 06:06:00+00:00,108.0,97.0,,...,0.0,0.0,0.75,0.0,1.0,2,1.0,0.0,1.366667,2.116667
4,5,13180007,27543152,30000213,2162-06-21 07:45:00+00:00,2162-06-21 08:15:00+00:00,2162-06-21 07:00:00+00:00,97.0,156.0,,...,0.0,0.0,0.5,0.0,1.0,3,1.0,0.0,2.116667,2.616667


In [5]:
df.loc[df.stay_id == 30000213,:]

Unnamed: 0,rn,subject_id,hadm_id,stay_id,timer,timer_next,timer_prev,glc,glc_next,glc_lab,...,tpn_rate,dextrose_fluid,delta_t,msk0,sum_msk0,n,gender_m,msk,timer_d,timer_next_d
2,3,13180007,27543152,30000213,2162-06-21 06:06:00+00:00,2162-06-21 07:00:00+00:00,2162-06-21 05:40:00+00:00,125.0,108.0,,...,0.0,0.0,0.9,0.0,1.0,1,1.0,0.0,0.466667,1.366667
3,4,13180007,27543152,30000213,2162-06-21 07:00:00+00:00,2162-06-21 07:45:00+00:00,2162-06-21 06:06:00+00:00,108.0,97.0,,...,0.0,0.0,0.75,0.0,1.0,2,1.0,0.0,1.366667,2.116667
4,5,13180007,27543152,30000213,2162-06-21 07:45:00+00:00,2162-06-21 08:15:00+00:00,2162-06-21 07:00:00+00:00,97.0,156.0,,...,0.0,0.0,0.5,0.0,1.0,3,1.0,0.0,2.116667,2.616667
5,6,13180007,27543152,30000213,2162-06-21 08:15:00+00:00,2162-06-21 18:41:00+00:00,2162-06-21 07:45:00+00:00,156.0,164.0,,...,0.0,0.0,10.433333,0.0,1.0,4,1.0,0.0,2.616667,13.05
6,7,13180007,27543152,30000213,2162-06-21 18:41:00+00:00,2162-06-21 23:00:00+00:00,2162-06-21 08:15:00+00:00,164.0,225.0,,...,0.0,0.0,4.316667,0.0,1.0,5,1.0,0.0,13.05,17.366667
7,8,13180007,27543152,30000213,2162-06-21 23:00:00+00:00,2162-06-22 08:00:00+00:00,2162-06-21 18:41:00+00:00,225.0,296.0,,...,0.0,0.0,9.0,0.0,1.0,6,1.0,0.0,17.366667,26.366667
8,9,13180007,27543152,30000213,2162-06-22 08:00:00+00:00,2162-06-22 12:00:00+00:00,2162-06-21 23:00:00+00:00,296.0,239.0,,...,0.0,0.0,4.0,0.0,1.0,7,1.0,0.0,26.366667,30.366667


In [6]:
# subsample the data
icu_sample = np.random.choice(df.stay_id.unique(),200)
df = df.loc[df.stay_id.isin(icu_sample),:]
print(df.shape)

(4075, 54)


In [7]:
with open('../data/feature_sets.json', 'r') as f:
    feature_sets = json.load(f)
features = feature_sets['test_features']

In [8]:
features['timevarying'][0]

'glc'

In [9]:
dims = {'input_dim_t':len(features['timevarying']),
             'input_dim_0':len(features['static']),
             'input_dim_i':len(features['intervention']),
             'hidden_dim_t':8,
             'hidden_dim_0':None,
             'hidden_dim_i':4,
             'input_size_update':len(features['timevarying'])+len(features['static'])}

In [16]:
mimic = MIMICDataModule(features,df,df,batch_size=128,testing = False)
mimic.prepare_data()
mimic.setup()

180it [00:00, 458.55it/s]
20it [00:00, 447.82it/s]
200it [00:00, 460.25it/s]


In [17]:
next(iter(mimic.train_dataloader()))

(tensor([[[ 0.8902,  0.2001],
          [ 0.7621,  0.4504],
          [ 0.7436,  0.3501],
          ...,
          [ 0.0000,  0.0000],
          [ 0.0000,  0.0000],
          [ 0.0000,  0.0000]],
 
         [[ 0.1643,  0.0000],
          [ 0.0623,  0.0000],
          [-0.2688,  0.0000],
          ...,
          [ 0.0000,  0.0000],
          [ 0.0000,  0.0000],
          [ 0.0000,  0.0000]],
 
         [[-0.1795,  0.0000],
          [ 0.0212,  0.0000],
          [ 0.0000,  0.0000],
          ...,
          [ 0.0000,  0.0000],
          [ 0.0000,  0.0000],
          [ 0.0000,  0.0000]],
 
         ...,
 
         [[ 0.0351,  0.0000],
          [-0.2054,  0.0000],
          [-0.0364,  0.0000],
          ...,
          [ 0.0000,  0.0000],
          [ 0.0000,  0.0000],
          [ 0.0000,  0.0000]],
 
         [[-0.2054,  0.0000],
          [-0.2595,  0.0000],
          [ 0.3911,  0.0000],
          ...,
          [ 0.0000,  0.0000],
          [ 0.0000,  0.0000],
          [ 0.0000,  0.0000

### Models

In [32]:
from src.models.base import BaseModel,BaseModel
import torchctrnn as ct

from torch.nn.utils.parametrizations import spectral_norm

def ginv(x):
    x = x.copy()
    x = np.exp(x + np.log(140))
    return x

def g(x):
    x = x.copy()
    x = np.log(x) - np.log(140)
    return x

class Func(nn.Module):
    def __init__(self,input_dim,hidden_dim,output_dim):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.input_dim = input_dim
        self.layers = nn.Sequential(
            spectral_norm(nn.Linear(input_dim, hidden_dim)),
            nn.Tanh(),
            spectral_norm(nn.Linear(hidden_dim, output_dim)),
        )

    def forward(self,hidden):
        output = self.layers(hidden)
        return output


class ctGRUModel(BaseModel):

    def __init__(self,dims,outputNN,preNN=nn.Identity(),NN0=nn.Identity(),learning_rate=1e-1,update_loss=0.1,merror=1e-2):
        func = Func(dims['hidden_dim_t'],50,dims['hidden_dim_t'])
        odenet = ct.NeuralODE(func,time_func='tanh',time_dependent=False,data_dependent=False,
                            solver='euler',solver_options={'step_size':1e-2})
        odernn = ct.ODEGRUCell(odenet,dims['input_size_update'],dims['hidden_dim_t'])
        outNN = outputNN(dims['hidden_dim_t'])
        super().__init__(odernn,outNN,preNN,NN0,dims,learning_rate,update_loss,merror)
        
        #self.save_hyperparameters({'net':'ctGRUModel'})


In [33]:
outputNN = GaussianOutputNNLL
model = ctGRUModel(dims,outputNN)

NeuralODE's forward method missing args: ['input', 't']. These are assumed not applicable


In [34]:
df.loc[df.glc.isnull(),:]

Unnamed: 0,rn,subject_id,hadm_id,stay_id,timer,timer_next,timer_prev,glc,glc_next,glc_lab,...,tpn_rate,dextrose_fluid,delta_t,msk0,sum_msk0,n,gender_m,msk,timer_d,timer_next_d


In [35]:
print('no change prediction error: {}'.format(np.sqrt(np.mean((df.glc - df.glc_next)**2))))

no change prediction error: 42.35775216538079


### Split

In [36]:
kf = KFold(n_splits=3)
ids_ = df.stay_id.unique()
splits = kf.split(ids_)

### Train

In [37]:
for i,(train_ids, test_ids) in enumerate(splits):

    df_test = df.loc[df[features['id']].isin(ids_[test_ids])].copy(deep=True)
    df_train = df.loc[df[features['id']].isin(ids_[train_ids])].copy(deep=True)

    mimic3 = MIMICDataModule(features,df_train,df_test,batch_size=32,testing = False)
    mimic3.prepare_data()
    mimic3.setup()
    
    checkpoint_callback = ModelCheckpoint(monitor='val_loss',save_top_k=1)

    # train
    early_stopping = EarlyStopping(monitor="val_loss",mode="min",verbose=True,patience=10,min_delta=0.0)  # mostly defaults
    trainer = pl.Trainer(max_epochs=1)
    trainer.fit(model, mimic3)
    trainer.test(model,mimic3)

    break

119it [00:00, 274.76it/s]
14it [00:00, 444.30it/s]
67it [00:00, 453.26it/s]
GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /home/jupyter/irregular-ts/scripts/lightning_logs
107it [00:00, 433.48it/s]
12it [00:00, 451.70it/s]

  | Name     | Type               | Params
------------------------------------------------
0 | RNN      | ODEGRUCell         | 1.2 K 
1 | OutputNN | GaussianOutputNNLL | 82    
2 | preNN    | Identity           | 0     
3 | NN0      | Identity           | 0     
------------------------------------------------
1.3 K     Trainable params
0         Non-trainable params
1.3 K     Total params
0.005     Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

TypeError: 'float' object is not callable

In [38]:
predictions = trainer.predict(model,mimic3.test_dataloader())

Predicting: 3it [00:10,  3.38s/it]


In [None]:
predictions[0][:,0]

In [None]:
df.head()

In [40]:
len(predictions)

3

In [46]:
preds = torch.cat(predictions,dim=0).numpy()

In [54]:
df = pd.DataFrame(preds,columns=['rn','mu','sigma'])
df['rn'] = df.rn.astype(int)

In [55]:
df

Unnamed: 0,rn,mu,sigma
0,139,-0.224051,0.694301
1,141,-0.177733,0.679537
2,142,-0.166634,0.679351
3,143,-0.161637,0.681995
4,144,-0.164831,0.685558
...,...,...,...
1777,255894,-0.215576,0.677997
1778,255895,-0.217490,0.680621
1779,255896,-0.215389,0.678711
1780,255897,-0.216886,0.679910


0          139
1          141
2          142
3          143
4          144
         ...  
1777    255894
1778    255895
1779    255896
1780    255897
1781    255898
Name: rn, Length: 1782, dtype: int64