In [95]:
from __future__ import absolute_import
from __future__ import print_function

import numpy as np
import argparse
import os
import imp
import re

from mimic3models.patient_embedding import utils
from mimic3benchmark.readers import PatientEmbeddingReader, InHospitalMortalityReader

from mimic3models.preprocessing import DiscretizerContinuous, Normalizer
from mimic3models import common_utils

from mimic3models.pytorch_models.embedding.dataset.utils import PatientEmbeddingDataset
from mimic3models.pytorch_models.classification.dataset.utils import ClassificationDataset
from mimic3models.pytorch_models.embedding.train.train import EmbeddingTrainer

import torch

from torch.utils.data import DataLoader

In [84]:
data = "/home/neil.jethani/patient_embedding/data/in_hospital_mortality"

In [88]:
# Build readers, discretizers, normalizers

train_reader = InHospitalMortalityReader(dataset_dir=os.path.join(data, 'val'), 
                                          listfile=os.path.join(data, 'val', 'listfile.csv'), 
                                          period_length=48.0)


val_reader = InHospitalMortalityReader(dataset_dir=os.path.join(data, 'val_test'),
                                    listfile=os.path.join(data, 'val_test', 'listfile.csv'),
                                    period_length=48.0)

print("Initializing Discretizer and Normalizer")
discretizer = DiscretizerContinuous(timestep=1.0,
                                    store_masks=False,
                                    impute_strategy='previous',
                                    start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1]
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here which columns to standardize
normalizer_state = None
if normalizer_state is None:
    normalizer_state = 'ptemb_ts{}.input_str:{}.start_time:zero.normalizer'.format(1.0, 'previous')
    normalizer_state = os.path.join("/home/neil.jethani/patient_embedding/src/mimic3models/patient_embedding", normalizer_state)
normalizer.load_params(normalizer_state)

Initializing Discretizer and Normalizer


In [125]:
#Create Dataset + DataLoader
print("Building Dataset")
train_dataset = ClassificationDataset(reader=train_reader, discretizer=discretizer, 
                                        normalizer=normalizer, return_name=False, 
                                        embed_method='DAE')
val_dataset = ClassificationDataset(reader=val_reader, discretizer=discretizer, 
                                      normalizer=normalizer, return_name=True, 
                                      embed_method='DAE')

Building Dataset


In [131]:
val_dataset[0]['X'].shape

(408,)

In [12]:
import pandas as pd

In [50]:
pd.DataFrame(x['src'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,-0.047935,0.113479,-0.454836,-2.235415,-1.776871,-2.599113,-1.296513,-0.3705,0.791892,0.027862,0.167818,-0.00093,0.903197,1.002486,0.425492,-1.629359,0.058328
1,-0.047935,0.096025,-0.454836,-2.235415,-1.776871,-2.599113,-1.296513,-0.3705,0.791892,0.027862,0.147034,-0.00093,1.694514,0.956628,0.425492,-1.629359,0.067352
2,-0.047935,0.096025,-0.454836,-0.321417,0.449932,0.210765,0.840339,-0.3705,1.032938,0.027862,0.15223,-0.00093,0.903197,1.185922,0.09538,-1.629359,0.067352
3,-0.047935,0.106498,-0.454836,-0.321417,0.449932,0.210765,0.840339,-0.3705,0.88831,0.027862,0.167818,-0.000307,0.11188,1.231781,0.09538,-1.629359,0.067352
4,-0.047935,0.134425,-0.454836,-0.321417,0.449932,0.210765,0.840339,-0.792414,1.177566,0.027862,0.198995,-0.000307,0.428406,1.140063,0.09538,-1.629359,0.067352
5,-0.047935,0.130934,-0.454836,-0.321417,0.449932,0.210765,0.840339,-0.792414,0.791892,0.027862,0.204191,-0.00093,0.428406,1.277639,0.09538,-1.629359,0.067352
6,-0.047935,0.123952,-0.454836,0.635583,0.449932,0.522974,0.840339,-0.792414,0.93652,0.027862,0.214583,0.000316,-0.362911,1.736228,0.323918,-1.629359,0.067352
7,-0.047935,0.123952,-0.454836,0.635583,0.449932,0.522974,0.840339,-0.792414,0.791892,0.027862,0.193799,-0.001553,3.435411,1.185922,0.323918,-1.629359,0.067352
8,-0.047935,0.144897,-0.454836,0.635583,0.449932,0.522974,0.840339,-0.792414,0.743683,0.027862,0.235367,0.000316,3.435411,1.64451,0.323918,-1.629359,0.067352
9,-0.047935,0.134425,-0.454836,0.635583,0.449932,0.522974,0.840339,-0.792414,0.88831,0.027862,0.209387,-0.00093,0.270143,1.461075,0.323918,-1.629359,0.067352


In [51]:
y = 0
for t in x['src']:
    y += sum((t - x['src'][0])**2)
    
y/(23*17)

1.6294472507200672

In [48]:
((.011/45)*1024)*10

2.503111111111111

In [71]:
from torch.utils.data import Sampler

class Selector(Sampler):
    def __init__(self, dataset, mn, mx, bs=1024, indices = None,):
        self.min = mn*bs
        self.max = mx*bs
        self.indices = list(range(len(dataset))) \
            if indices is None else indices

    def __iter__(self):
        return (self.indices[i] for i in range(self.min, self.max))

    def __len__(self):
        return self.max-self.min

class Selector2(Sampler):
    def __init__(self, dataset, ind, indices=None):
        self.indices = list(range(len(dataset))) \
            if indices is None else indices
        self.ind = ind
        
    def __iter__(self):
        return (self.indices[i] for i in range(self.ind, self.ind + 30))

    def __len__(self):
        return 1

In [None]:
299*1024+110+16

In [132]:
print("Building DataLoader")
trainLoader = DataLoader(train_dataset, batch_size=512, shuffle=False, num_workers=1) #sampler = Selector2(train_dataset,419433))
valLoader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, sampler = Selector2(train_dataset, 306286))#Selector(train_dataset,290, 300))


Building DataLoader


In [75]:
trainLoader.dataset.get_number_of_visits()

7915

In [None]:
#Train Model
print("Creating Trainer")
trainer = EmbeddingTrainer(model = "/home/neil.jethani/patient_embedding/models/patient_embedding/DAE/2019-07-22/best/DAE.ep49", 
                           output_dir= "/home/neil.jethani/patient_embedding/models/patient_embedding/DAE/delete", 
                           train_dataloader=trainLoader, test_dataloader=valLoader, 
                           embed_method = "DAE", 
                           layers = 0, heads = 0, MSprop = 0,
                           lr = 0.01, betas=(0.9, 0.98), eps=1e-9,
                           factor = 2, warmup = 4000, 
                           log_freq=1, with_cuda=False)

In [133]:
for i, data in enumerate(trainLoader):
    x = data
    break

In [135]:
x['X'].size()

torch.Size([512, 408])

In [None]:
trainer.test(0)

In [None]:
import random
trainer.dataset_list['test'] = random.shuffle(trainer.dataset_list['test'])
trainer.dataset_list['test']

In [None]:
trainer.dataset_list['train']

In [None]:
print("Training Start")
for epoch in range(args.epochs):
    trainer.train(epoch)
    trainer.save(epoch)
    if valLoader is not None:
        trainer.test(epoch)
trainer.save_best()
trainer.write_loss()

In [None]:
import torch
import torch.nn.functional as F
import torch.nn as nn

In [None]:
x = torch.torch.randn(512, 24, 17)
x = F.pad(x, (0,0,1,0,0,0), 'constant', 0)
print(x.size())
x[1, 0, :]

In [None]:
x[:,-1, :].unsqueeze(-2).expand(-1, 24, -1).size()

In [None]:
y = x[:, 1:, :]
y.size()[1]

In [None]:
emb = x[:, 0, :].unsqueeze(-2).expand(-1, y.size()[1], -1)
emb.size()

In [None]:
emb = nn.Embedding(3, 10, padding_idx=0)
emb(torch.LongTensor([0,1,2]))

In [None]:
torch.LongTensor(range(1,25))

In [99]:
t = torch.rand(10, 24, 17)

In [77]:
t[:, 23, :].unsqueeze(1).expand(-1, 24, -1)

tensor([[[0.5433, 0.5669, 0.1180, 0.4833, 0.2528, 0.1582, 0.5227, 0.3581,
          0.7977, 0.3902, 0.2812, 0.1074, 0.6871, 0.7518, 0.1470, 0.1667,
          0.7554],
         [0.5433, 0.5669, 0.1180, 0.4833, 0.2528, 0.1582, 0.5227, 0.3581,
          0.7977, 0.3902, 0.2812, 0.1074, 0.6871, 0.7518, 0.1470, 0.1667,
          0.7554],
         [0.5433, 0.5669, 0.1180, 0.4833, 0.2528, 0.1582, 0.5227, 0.3581,
          0.7977, 0.3902, 0.2812, 0.1074, 0.6871, 0.7518, 0.1470, 0.1667,
          0.7554],
         [0.5433, 0.5669, 0.1180, 0.4833, 0.2528, 0.1582, 0.5227, 0.3581,
          0.7977, 0.3902, 0.2812, 0.1074, 0.6871, 0.7518, 0.1470, 0.1667,
          0.7554],
         [0.5433, 0.5669, 0.1180, 0.4833, 0.2528, 0.1582, 0.5227, 0.3581,
          0.7977, 0.3902, 0.2812, 0.1074, 0.6871, 0.7518, 0.1470, 0.1667,
          0.7554],
         [0.5433, 0.5669, 0.1180, 0.4833, 0.2528, 0.1582, 0.5227, 0.3581,
          0.7977, 0.3902, 0.2812, 0.1074, 0.6871, 0.7518, 0.1470, 0.1667,
          0.7554]

In [79]:
val_dataset.get_number_of_visits()

7844

In [107]:
torch.cat((t, t.mean(1).unsqueeze(1)), dim=1)[0,:,:]

tensor([[0.1432, 0.4390, 0.4101, 0.4472, 0.2437, 0.1816, 0.7163, 0.9704, 0.7701,
         0.5479, 0.7096, 0.7371, 0.5672, 0.8374, 0.0588, 0.2799, 0.9094],
        [0.1458, 0.9845, 0.3800, 0.9496, 0.6697, 0.9460, 0.0100, 0.3126, 0.4404,
         0.4333, 0.8974, 0.6661, 0.0474, 0.0040, 0.2338, 0.6218, 0.7950],
        [0.7391, 0.1776, 0.3844, 0.5500, 0.5094, 0.7308, 0.3808, 0.3324, 0.5538,
         0.0131, 0.3966, 0.6333, 0.0750, 0.4559, 0.4892, 0.8924, 0.1431],
        [0.8294, 0.7924, 0.5776, 0.8835, 0.5446, 0.1719, 0.4953, 0.5111, 0.0217,
         0.9746, 0.7972, 0.8229, 0.7079, 0.1877, 0.4452, 0.0889, 0.6590],
        [0.5213, 0.3955, 0.4710, 0.5380, 0.6827, 0.5926, 0.5466, 0.1783, 0.4587,
         0.5938, 0.0411, 0.3161, 0.9585, 0.1430, 0.8473, 0.0125, 0.6892],
        [0.9770, 0.8905, 0.7875, 0.9030, 0.9668, 0.9870, 0.7614, 0.2302, 0.4598,
         0.9460, 0.9996, 0.4139, 0.7679, 0.5740, 0.3945, 0.9917, 0.4071],
        [0.9779, 0.6108, 0.1203, 0.1470, 0.7982, 0.3797, 0.9532, 0.509

In [123]:
z = torch.ones(5,5,5)

In [122]:
print(np.triu(z.numpy(), -1))
print(np.triu(z.numpy(), 2))
test = np.triu(z.numpy(), -1)
test[np.triu(z.numpy(), 2)==1] = 0
print(test)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1.]
 [0. 0. 1. 1. 1.]
 [0. 0. 0. 1. 1.]]
[[0. 0. 1. 1. 1.]
 [0. 0. 0. 1. 1.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[1. 1. 0. 0. 0.]
 [1. 1. 1. 0. 0.]
 [0. 1. 1. 1. 0.]
 [0. 0. 1. 1. 1.]
 [0. 0. 0. 1. 1.]]


In [124]:
print(np.triu(z.numpy(), -1))

[[[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [0. 1. 1. 1. 1.]
  [0. 0. 1. 1. 1.]
  [0. 0. 0. 1. 1.]]

 [[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [0. 1. 1. 1. 1.]
  [0. 0. 1. 1. 1.]
  [0. 0. 0. 1. 1.]]

 [[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [0. 1. 1. 1. 1.]
  [0. 0. 1. 1. 1.]
  [0. 0. 0. 1. 1.]]

 [[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [0. 1. 1. 1. 1.]
  [0. 0. 1. 1. 1.]
  [0. 0. 0. 1. 1.]]

 [[1. 1. 1. 1. 1.]
  [1. 1. 1. 1. 1.]
  [0. 1. 1. 1. 1.]
  [0. 0. 1. 1. 1.]
  [0. 0. 0. 1. 1.]]]


In [139]:
z[:, :-1, :].size()

torch.Size([5, 4, 5])

In [143]:
import torch.nn.functional as F

In [148]:
src_mask = np.ones((1024, 24, 24))
    
#Add Connections to Neighbors
src_mask = np.triu(src_mask, -1)
src_mask[np.triu(src_mask, 2)==1] = 0
src_mask = torch.tensor(src_mask).float()

#Append Connections to Embedding Token
src_mask = F.pad(src_mask, (0,1,0,1,0,0), 'constant', 1)
print(src_mask.size())

torch.Size([1024, 25, 25])


In [152]:
src_mask[0][10:, 10:]

tensor([[1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1.],
        [1., 1., 1.,