In [1]:
from __future__ import absolute_import
from __future__ import print_function

import numpy as np
import argparse
import os
import imp
import re

from mimic3models.patient_embedding import utils
from mimic3benchmark.readers import DecompensationReader

from mimic3models.preprocessing import DiscretizerContinuous, Normalizer
from mimic3models import common_utils

from mimic3models.pytorch_models.classification.dataset.utils import DecompensationDataset
from mimic3models.pytorch_models.classification.train.train import ClassificationTrainer

from torch.utils.data import DataLoader

In [2]:
# Build readers, discretizers, normalizers
data = "/home/neil.jethani/patient_embedding/data/decompensation"

print("Creating Data File Reader")
train_reader = DecompensationReader(dataset_dir=os.path.join(data, 'val'), 
                                    listfile=os.path.join(data, 'val', 'listfile.csv'), 
                                    period_length=24.0)

val_reader = DecompensationReader(dataset_dir=os.path.join(data, 'val_test'),
                                  listfile=os.path.join(data, 'val_test', 'listfile.csv'),
                                  period_length=24.0)

print("Initializing Discretizer and Normalizer")
discretizer = DiscretizerContinuous(timestep=1.0,
                                    store_masks=False,
                                    impute_strategy='previous',
                                    start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1]
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

normalizer = Normalizer(fields=cont_channels)  # choose here which columns to standardize
normalizer_state = None
if normalizer_state is None:
    normalizer_state = 'ptemb_ts{}.input_str:{}.start_time:zero.normalizer'.format(1.0, 'previous')
    normalizer_state = os.path.join("/home/neil.jethani/patient_embedding/src/mimic3models/patient_embedding", normalizer_state)
normalizer.load_params(normalizer_state)

#Create Dataset + DataLoader
print("Building Dataset")
train_dataset = DecompensationDataset(reader=train_reader, discretizer=discretizer, 
                                      normalizer=normalizer, return_name=True, 
                                      embed_method='TRANS')
val_dataset = DecompensationDataset(reader=val_reader, discretizer=discretizer, 
                                    normalizer=normalizer, return_name=False, 
                                    embed_method='TRANS')
print("Building DataLoader")
trainLoader = DataLoader(train_dataset, batch_size=512, shuffle=False, num_workers=1)
valLoader = DataLoader(val_dataset, batch_size=512, shuffle=False, num_workers=1)

Creating Data File Reader
Initializing Discretizer and Normalizer
Building Dataset
Building DataLoader


In [4]:
for i in range(100):
    data = train_dataset[i]
    seq_len = len(data['X'])
    print(seq_len)
    if seq_len != 24:
        print(seq_len)
        print('name:{}, t:{}'.format(data['name'], data['t']))


24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
24
