In [1]:
import sys
#import cPickle as pickle
import pickle
from datetime import datetime

In [2]:
def convert_to_icd9(dxStr):
	if dxStr.startswith('E'):
		if len(dxStr) > 4: return dxStr[:4] + '.' + dxStr[4:]
		else: return dxStr
	else:
		if len(dxStr) > 3: return dxStr[:3] + '.' + dxStr[3:]
		else: return dxStr

In [3]:
def convert_to_3digit_icd9(dxStr):
	if dxStr.startswith('E'):
		if len(dxStr) > 4: return dxStr[:4]
		else: return dxStr
	else:
		if len(dxStr) > 3: return dxStr[:3]
		else: return dxStr


# preprocess main

In [40]:
admissionFile = "./mimic_demo/data/ADMISSIONS.csv"
diagnosisFile = "./mimic_demo/data/DIAGNOSES_ICD.csv"
outFile = "./mimic_demo/outdemo"

In [41]:
print('Building pid-admission mapping, admission-date mapping')
pidAdmMap = {}
admDateMap = {}
infd = open(admissionFile, 'r')
infd.readline()
for line in infd:
    tokens = line.strip().split(',')
    pid = int(tokens[1])
    admId = int(tokens[2])
    admTime = datetime.strptime(tokens[3], '%Y-%m-%d %H:%M:%S')
    admDateMap[admId] = admTime
    if pid in pidAdmMap: pidAdmMap[pid].append(admId)
    else: pidAdmMap[pid] = [admId]
infd.close()
#pidAdmMap把病人的ID 和 他到院紀錄 做成dic
#admDateMap把每個到院記錄存成時間

Building pid-admission mapping, admission-date mapping


In [42]:
print('Building admission-dxList mapping')
admDxMap = {}
admDxMap_3digit = {}
infd = open(diagnosisFile, 'r')
infd.readline()
for line in infd:
    tokens = line.strip().split(',')
    admId = int(tokens[2])
    dxStr = 'D_' + convert_to_icd9(tokens[4][1:-1]) ############## Uncomment this line and comment the line below, if you want to use the entire ICD9 digits.
    dxStr_3digit = convert_to_3digit_icd9(tokens[4])

    if admId in admDxMap: 
        admDxMap[admId].append(dxStr)
    else: 
        admDxMap[admId] = [dxStr]

    if admId in admDxMap_3digit: 
        admDxMap_3digit[admId].append(dxStr_3digit)
    else: 
        admDxMap_3digit[admId] = [dxStr_3digit]
infd.close()

#tokens[4]後面原先有一個[1:-1], 看不出作用所以被我移除了


Building admission-dxList mapping


In [43]:
print('Building pid-sortedVisits mapping')
pidSeqMap = {}
pidSeqMap_3digit = {}
for pid, admIdList in pidAdmMap.items():
    if len(admIdList) < 2: 
        continue

    sortedList = sorted([(admDateMap[admId], admDxMap[admId]) for admId in admIdList])
    pidSeqMap[pid] = sortedList

    sortedList_3digit = sorted([(admDateMap[admId], admDxMap_3digit[admId]) for admId in admIdList])
    pidSeqMap_3digit[pid] = sortedList_3digit
#visit to disease

Building pid-sortedVisits mapping


In [45]:
print('Building pids, dates, strSeqs')
pids = []
dates = []
seqs = []
for pid, visits in pidSeqMap.items():
    pids.append(pid)
    seq = []
    date = []
    for visit in visits:
        date.append(visit[0])
        seq.append(visit[1])
    dates.append(date)
    seqs.append(seq)

Building pids, dates, strSeqs


In [46]:
print('Building pids, dates, strSeqs for 3digit ICD9 code')
seqs_3digit = []
for pid, visits in pidSeqMap_3digit.items():
    seq = []
    for visit in visits:
        seq.append(visit[1])
    seqs_3digit.append(seq)

Building pids, dates, strSeqs for 3digit ICD9 code


In [47]:
print('Converting strSeqs to intSeqs, and making types')
types = {}
newSeqs = []
for patient in seqs:
    newPatient = []
    for visit in patient:
        newVisit = []
        for code in visit:
            if code in types:
                newVisit.append(types[code])
            else:
                types[code] = len(types)
                newVisit.append(types[code])
        newPatient.append(newVisit)
    newSeqs.append(newPatient)

Converting strSeqs to intSeqs, and making types


In [49]:
print('Converting strSeqs to intSeqs, and making types for 3digit ICD9 code')
types_3digit = {}
newSeqs_3digit = []
for patient in seqs_3digit:
    newPatient = []
    for visit in patient:
        newVisit = []
        for code in set(visit):
            if code in types_3digit:
                newVisit.append(types_3digit[code])
            else:
                types_3digit[code] = len(types_3digit)
                newVisit.append(types_3digit[code])
        newPatient.append(newVisit)
    newSeqs_3digit.append(newPatient)

Converting strSeqs to intSeqs, and making types for 3digit ICD9 code


In [50]:
print('Re-formatting to Med2Vec dataset')
seqs = []
for patient in newSeqs:
    seqs.extend(patient)
    seqs.append([-1])
seqs = seqs[:-1]

seqs_3digit = []
for patient in newSeqs_3digit:
    seqs_3digit.extend(patient)
    seqs_3digit.append([-1])
seqs_3digit = seqs_3digit[:-1]



Re-formatting to Med2Vec dataset


In [51]:
pickle.dump(seqs, open(outFile+'.seqs', 'wb'), -1)
pickle.dump(types, open(outFile+'.types', 'wb'), -1)
pickle.dump(seqs_3digit, open(outFile+'.3digitICD9.seqs', 'wb'), -1)
pickle.dump(types_3digit, open(outFile+'.3digitICD9.types', 'wb'), -1)

In [52]:
#這邊輸出檔案其實沒問題，但不能用jupyter 讀取 會變成亂碼(就這樣)
pickle.load(open('./mimic_demo/outdemo.3digitICD9.seqs', 'rb'))

[[0, 1, 2, 3, 4, 5, 6, 7, 8],
 [9, 0, 10, 1, 11, 3, 2, 4, 5, 7, 12, 13, 14],
 [-1],
 [15, 16, 17, 10, 11, 18, 19, 20, 21, 22, 23, 24, 25, 8, 12, 14],
 [10,
  2,
  26,
  27,
  25,
  28,
  29,
  30,
  31,
  32,
  33,
  23,
  34,
  8,
  16,
  35,
  36,
  17,
  20,
  37,
  21],
 [15, 17, 38, 27, 11, 18, 35, 21, 33, 22, 23, 24, 34, 39, 12, 14],
 [-1],
 [16, 40, 41, 2, 42, 43, 44, 45, 23, 46, 47, 48],
 [29, 16, 40, 49, 11, 45, 23, 14],
 [-1],
 [50, 16, 51, 6, 52],
 [50, 16, 9, 10, 51, 41, 53, 54, 55],
 [-1],
 [50, 16, 10, 2, 26, 56, 57, 8, 58, 59],
 [60, 61, 26, 8, 62],
 [-1],
 [9, 40, 26, 2, 18, 63, 22, 64, 23, 45, 65, 66, 67, 68, 69],
 [16, 70, 10, 2, 26, 41, 43, 18, 11, 71, 45, 14, 23, 21, 72, 73, 74, 68],
 [-1],
 [16, 41, 75, 2, 22, 76, 8],
 [10, 41, 75, 2, 76, 22, 77],
 [-1],
 [10,
  78,
  2,
  79,
  80,
  81,
  82,
  83,
  84,
  42,
  85,
  86,
  46,
  87,
  88,
  89,
  16,
  90,
  91,
  92,
  93,
  94,
  95,
  43,
  96,
  73,
  12,
  97,
  98,
  69],
 [30, 99, 10, 26, 42, 43, 21, 100,

In [53]:
len(types)

163

# med2vec main

In [54]:
import sys, random
import numpy as np
import pickle
from collections import OrderedDict
import argparse

import theano
import theano.tensor as T
from theano import config



In [55]:
def numpy_floatX(data):
	return np.asarray(data, dtype=config.floatX)

def unzip(zipped):
	new_params = OrderedDict()
	for k, v in zipped.items():
		new_params[k] = v.get_value()
	return new_params


In [56]:
def init_params(options):
	params = OrderedDict()

	numXcodes = options['numXcodes']
	numYcodes = options['numYcodes']
	embDimSize= options['embDimSize']
	demoSize = options['demoSize']
	hiddenDimSize = options['hiddenDimSize']

	params['W_emb'] = np.random.uniform(-0.01, 0.01, (numXcodes, embDimSize)).astype(config.floatX) #emb matrix needs an extra dimension for the time
	params['b_emb'] = np.zeros(embDimSize).astype(config.floatX)
	params['W_hidden'] = np.random.uniform(-0.01, 0.01, (embDimSize+demoSize, hiddenDimSize)).astype(config.floatX) #emb matrix needs an extra dimension for the time
	params['b_hidden'] = np.zeros(hiddenDimSize).astype(config.floatX)
	if numYcodes > 0:
		params['W_output'] = np.random.uniform(-0.01, 0.01, (hiddenDimSize, numYcodes)).astype(config.floatX) #emb matrix needs an extra dimension for the time
		params['b_output'] = np.zeros(numYcodes).astype(config.floatX)
	else:
		params['W_output'] = np.random.uniform(-0.01, 0.01, (hiddenDimSize, numXcodes)).astype(config.floatX) #emb matrix needs an extra dimension for the time
		params['b_output'] = np.zeros(numXcodes).astype(config.floatX)

	return params

In [57]:
def load_params(options):
	params = np.load(options['modelFile'])
	return params

def init_tparams(params):
	tparams = OrderedDict()
	for k, v in params.items():
		tparams[k] = theano.shared(v, name=k)
	return tparams

In [58]:
def build_model(tparams, options):
	x = T.matrix('x', dtype=config.floatX)
	d = T.matrix('d', dtype=config.floatX)
	y = T.matrix('y', dtype=config.floatX)
	mask = T.vector('mask', dtype=config.floatX)

	logEps = options['logEps']

	emb = T.maximum(T.dot(x, tparams['W_emb']) + tparams['b_emb'],0)
	if options['demoSize'] > 0: emb = T.concatenate((emb, d), axis=1)
	visit = T.maximum(T.dot(emb, tparams['W_hidden']) + tparams['b_hidden'],0)
	results = T.nnet.softmax(T.dot(visit, tparams['W_output']) + tparams['b_output'])
	
	mask1 = (mask[:-1] * mask[1:])[:,None]
	mask2 = (mask[:-2] * mask[1:-1] * mask[2:])[:,None]
	mask3 = (mask[:-3] * mask[1:-2] * mask[2:-1] * mask[3:])[:,None]
	mask4 = (mask[:-4] * mask[1:-3] * mask[2:-2] * mask[3:-1] * mask[4:])[:,None]
	mask5 = (mask[:-5] * mask[1:-4] * mask[2:-3] * mask[3:-2] * mask[4:-1] * mask[5:])[:,None]

	t = None
	if options['numYcodes'] > 0: t = y
	else: t = x

	forward_results =  results[:-1] * mask1
	forward_cross_entropy = -(t[1:] * T.log(forward_results + logEps) + (1. - t[1:]) * T.log(1. - forward_results + logEps))

	forward_results2 =  results[:-2] * mask2
	forward_cross_entropy2 = -(t[2:] * T.log(forward_results2 + logEps) + (1. - t[2:]) * T.log(1. - forward_results2 + logEps))

	forward_results3 =  results[:-3] * mask3
	forward_cross_entropy3 = -(t[3:] * T.log(forward_results3 + logEps) + (1. - t[3:]) * T.log(1. - forward_results3 + logEps))

	forward_results4 =  results[:-4] * mask4
	forward_cross_entropy4 = -(t[4:] * T.log(forward_results4 + logEps) + (1. - t[4:]) * T.log(1. - forward_results4 + logEps))

	forward_results5 =  results[:-5] * mask5
	forward_cross_entropy5 = -(t[5:] * T.log(forward_results5 + logEps) + (1. - t[5:]) * T.log(1. - forward_results5 + logEps))

	backward_results =  results[1:] * mask1
	backward_cross_entropy = -(t[:-1] * T.log(backward_results + logEps) + (1. - t[:-1]) * T.log(1. - backward_results + logEps))

	backward_results2 =  results[2:] * mask2
	backward_cross_entropy2 = -(t[:-2] * T.log(backward_results2 + logEps) + (1. - t[:-2]) * T.log(1. - backward_results2 + logEps))

	backward_results3 =  results[3:] * mask3
	backward_cross_entropy3 = -(t[:-3] * T.log(backward_results3 + logEps) + (1. - t[:-3]) * T.log(1. - backward_results3 + logEps))

	backward_results4 =  results[4:] * mask4
	backward_cross_entropy4 = -(t[:-4] * T.log(backward_results4 + logEps) + (1. - t[:-4]) * T.log(1. - backward_results4 + logEps))

	backward_results5 =  results[5:] * mask5
	backward_cross_entropy5 = -(t[:-5] * T.log(backward_results5 + logEps) + (1. - t[:-5]) * T.log(1. - backward_results5 + logEps))

	visit_cost1 = (forward_cross_entropy.sum(axis=1).sum(axis=0) + backward_cross_entropy.sum(axis=1).sum(axis=0)) / (mask1.sum() + logEps)
	visit_cost2 = (forward_cross_entropy2.sum(axis=1).sum(axis=0) + backward_cross_entropy2.sum(axis=1).sum(axis=0)) / (mask2.sum() + logEps)
	visit_cost3 = (forward_cross_entropy3.sum(axis=1).sum(axis=0) + backward_cross_entropy3.sum(axis=1).sum(axis=0)) / (mask3.sum() + logEps)
	visit_cost4 = (forward_cross_entropy4.sum(axis=1).sum(axis=0) + backward_cross_entropy4.sum(axis=1).sum(axis=0)) / (mask4.sum() + logEps)
	visit_cost5 = (forward_cross_entropy5.sum(axis=1).sum(axis=0) + backward_cross_entropy5.sum(axis=1).sum(axis=0)) / (mask5.sum() + logEps)

	windowSize = options['windowSize']
	visit_cost = visit_cost1
	if windowSize == 2:
		visit_cost = visit_cost1 + visit_cost2
	elif windowSize == 3:
		visit_cost = visit_cost1 + visit_cost2 + visit_cost3
	elif windowSize == 4:
		visit_cost = visit_cost1 + visit_cost2 + visit_cost3 + visit_cost4
	elif windowSize == 5:
		visit_cost = visit_cost1 + visit_cost2 + visit_cost3 + visit_cost4 + visit_cost5

	iVector = T.vector('iVector', dtype='int32')
	jVector = T.vector('jVector', dtype='int32')
	preVec = T.maximum(tparams['W_emb'],0)
	norms = (T.exp(T.dot(preVec, preVec.T))).sum(axis=1)
	emb_cost = -T.log((T.exp((preVec[iVector] * preVec[jVector]).sum(axis=1)) / norms[iVector]) + logEps)

	total_cost = visit_cost + T.mean(emb_cost) + options['L2_reg'] * (tparams['W_emb'] ** 2).sum()

	if options['demoSize'] > 0 and options['numYcodes'] > 0: return x, d, y, mask, iVector, jVector, total_cost
	elif options['demoSize'] == 0 and options['numYcodes'] > 0: return x, y, mask, iVector, jVector, total_cost
	elif options['demoSize'] > 0 and options['numYcodes'] == 0: return x, d, mask, iVector, jVector, total_cost
	else: return x, mask, iVector, jVector, total_cost

In [59]:
def adadelta(tparams, grads, x, mask, iVector, jVector, cost, options, d=None, y=None):
	zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.items()]
	running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.items()]
	running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.items()]

	zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
	rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

	if options['demoSize'] > 0 and options['numYcodes'] > 0:
		f_grad_shared = theano.function([x, d, y, mask, iVector, jVector], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared')
	elif options['demoSize'] == 0 and options['numYcodes'] > 0:
		f_grad_shared = theano.function([x, y, mask, iVector, jVector], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared')
	elif options['demoSize'] > 0 and options['numYcodes'] == 0:
		f_grad_shared = theano.function([x, d, mask, iVector, jVector], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared')
	else:
		f_grad_shared = theano.function([x, mask, iVector, jVector], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared')

	updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
	ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
	param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]

	f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update')

	return f_grad_shared, f_update

In [60]:
def load_data(xFile,dFile,yFile):
	seqX = np.array(pickle.load(open(xFile, 'rb')))
	seqD = []
	if len(dFile) > 0: seqD = np.asarray(pickle.load(open(dFile, 'rb')), dtype=config.floatX)
	seqY = []
	if len(yFile) > 0: seqY = np.array(pickle.load(open(yFile, 'rb')))
	return seqX, seqD, seqY

In [61]:
def pickTwo(codes, iVector, jVector):
	for first in codes:
		for second in codes:
			if first == second: continue
			iVector.append(first)
			jVector.append(second)

In [62]:
def padMatrix(seqs, labels, options):
	n_samples = len(seqs)
	iVector = []
	jVector = []
	numXcodes = options['numXcodes']
	numYcodes = options['numYcodes']

	if numYcodes > 0:
		x = np.zeros((n_samples, numXcodes)).astype(config.floatX)
		y = np.zeros((n_samples, numYcodes)).astype(config.floatX)
		mask = np.zeros((n_samples,)).astype(config.floatX)
		for idx, (seq, label) in enumerate(zip(seqs, labels[0].values())):
#		for idx, (seq, label) in enumerate(zip(seqs, labels)):
			if not seq[0] == -1:
				x[idx][seq] = 1.
				y[idx][label] = 1.
				pickTwo(seq, iVector, jVector)
				mask[idx] = 1.
		return x, y, mask, iVector, jVector
	else:
		x = np.zeros((n_samples, numXcodes)).astype(config.floatX)
		mask = np.zeros((n_samples,)).astype(config.floatX)
		for idx, seq in enumerate(seqs):
			if not seq[0] == -1:
				x[idx][seq] = 1.
				pickTwo(seq, iVector, jVector)
				mask[idx] = 1.
		return x, mask, iVector, jVector


In [73]:
def train_med2vec(seqFile='seqFile.txt', 
				demoFile='demoFile.txt',
				labelFile='labelFile.txt',
				outFile='outFile.txt',
				modelFile='modelFile.txt',
				L2_reg=0.001,
				numXcodes=20000, 
				numYcodes=20000, 
				embDimSize=1000,
				hiddenDimSize=2000,
				batchSize=100,
				demoSize=2,
				logEps=1e-8,
				windowSize=1,
				verbose=False,
				maxEpochs=1000):

	options = locals().copy()
	print('initializing parameters')
	params = init_params(options)
	#params = load_params(options)
	tparams = init_tparams(params)

	print('building models')
	f_grad_shared = None
	f_update = None
	if demoSize > 0 and numYcodes > 0:
		x, d, y, mask, iVector, jVector, cost = build_model(tparams, options)
		grads = T.grad(cost, wrt=list(tparams.values()))
		f_grad_shared, f_update = adadelta(tparams, grads, x, mask, iVector, jVector, cost, options, d=d, y=y)
	elif demoSize == 0 and numYcodes > 0:
		x, y, mask, iVector, jVector, cost = build_model(tparams, options)
		grads = T.grad(cost, wrt=list(tparams.values()))
		f_grad_shared, f_update = adadelta(tparams, grads, x, mask, iVector, jVector, cost, options, y=y)
	elif demoSize > 0 and numYcodes == 0:
		x, d, mask, iVector, jVector, cost = build_model(tparams, options)
		grads = T.grad(cost, wrt=list(tparams.values()))
		f_grad_shared, f_update = adadelta(tparams, grads, x, mask, iVector, jVector, cost, options, d=d)
	else:
		x, mask, iVector, jVector, cost = build_model(tparams, options)
		grads = T.grad(cost, wrt=list(tparams.values()))
		f_grad_shared, f_update = adadelta(tparams, grads, x, mask, iVector, jVector, cost, options)

	print('loading data')
	seqs, demos, labels = load_data(seqFile, demoFile, labelFile)
	n_batches = int(np.ceil(float(len(seqs)) / float(batchSize)))

	print('training start')
	for epoch in range(maxEpochs):
		iteration = 0
		costVector = []
		for index in random.sample(range(n_batches), n_batches):
			batchX = seqs[batchSize*index:batchSize*(index+1)]
			batchY = []
			batchD = []
			if demoSize > 0 and numYcodes > 0:
				batchY = [labels.tolist()][batchSize*index:batchSize*(index+1)]
				x, y, mask, iVector, jVector = padMatrix(batchX, batchY, options)
				batchD = demos[batchSize*index:batchSize*(index+1)]
				cost = f_grad_shared(x, batchD, y, mask, iVector, jVector)
			elif demoSize == 0 and numYcodes > 0:
#				batchY = labels[batchSize*index:batchSize*(index+1)]
				batchY = [labels.tolist()][batchSize*index:batchSize*(index+1)]
				x, y, mask, iVector, jVector = padMatrix(batchX, batchY, options)
				cost = f_grad_shared(x, y, mask, iVector, jVector)
			elif demoSize > 0 and numYcodes == 0:
				x, mask, iVector, jVector = padMatrix(batchX, batchY, options)
				batchD = demos[batchSize*index:batchSize*(index+1)]
				cost = f_grad_shared(x, batchD, mask, iVector, jVector)
			else:
				x, mask, iVector, jVector = padMatrix(batchX, batchY, options)
				cost = f_grad_shared(x, mask, iVector, jVector)
			costVector.append(cost)
			f_update()
			if (iteration % 10 == 0) and verbose: print('epoch:%d, iteration:%d/%d, cost:%f' % (epoch, iteration, n_batches, cost))
			iteration += 1
		print('epoch:%d, mean_cost:%f' % (epoch, np.mean(costVector)))
		tempParams = unzip(tparams)
		np.savez_compressed(outFile + '.' + str(epoch), **tempParams)

# main

In [74]:
train_med2vec(seqFile='./mimic_demo/outdemo.seqs',
              demoFile='',
              labelFile='./mimic_demo/outdemo.types', 
              outFile='./mimic_demo/outresult.txt',
              numXcodes=20000,
              numYcodes=20000, 
              embDimSize=1000, 
              hiddenDimSize=2000, 
              batchSize=100, 
              maxEpochs=10,
              L2_reg=0.001,
              demoSize=0,
              windowSize=1,
              logEps=1e-8,
              verbose='store_true')

initializing parameters
building models
loading data
training start
{'D_71': 0, 'D_84': 1, 'D_707': 2, 'D_80': 3, 'D_87': 4, 'D_89': 5, 'D_562': 6, 'D_72': 7, 'D_01': 8, 'D_5': 9, 'D_38': 10, 'D_959': 11, 'D_855': 12, 'D_188': 13, 'D_85': 14, 'D_99': 15, 'D_28': 16, 'D_76': 17, 'D_78': 18, 'D_41': 19, 'D_458': 20, 'D_140': 21, 'D_1': 22, 'D_120': 23, 'D_43': 24, 'D_107': 25, 'D_07': 26, 'D_028': 27, 'D_9': 28, 'D_411': 29, 'D_09': 30, 'D_278': 31, 'D_32': 32, 'D_904': 33, 'D_37': 34, 'D_403': 35, 'D_621': 36, 'D_44': 37, 'D_852': 38, 'D_27': 39, 'D_8': 40, 'D_53': 41, 'D_97': 42, 'D_070': 43, 'D_55': 44, 'D_500': 45, 'D_94': 46, 'D_084': 47, 'D_29': 48, 'D_90': 49, 'D_030': 50, 'D_966': 51, 'D_12': 52, 'D_428': 53, 'D_75': 54, 'D_273': 55, 'D_77': 56, 'D_18': 57, 'D_498': 58, 'D_586': 59, 'D_324': 60, 'D_314': 61, 'D_202': 62, 'D_283': 63, 'D_981': 64, 'D_98': 65, 'D_885': 66, 'D_330': 67, 'D_039': 68, 'D_878': 69, 'D_509': 70, 'D_381': 71, 'D_16': 72, 'D_272': 73, 'D_780': 74, 'D_541'

IndexError: too many indices for array

In [75]:
seqs, demos, labels = load_data('./mimic_demo/outdemo.seqs','','./mimic_demo/outdemo.types')

In [78]:
len(seqs)

56

In [82]:
len(labels.tolist())

163