In [1]:
# coding: utf-8

import os
import re
import numpy as np
import pandas as pd
import ujson as json

patient_ids = []

for filename in os.listdir('./raw'):
    # the patient data in PhysioNet contains 6-digits
    match = re.search('\d{6}', filename)
    if match:
        id_ = match.group()
        patient_ids.append(id_)

out = pd.read_csv('./raw/Outcomes-a.txt').set_index('RecordID')['In-hospital_death']
print ('done')

done


In [30]:
# we select 35 attributes which contains enough non-values
attributes = ['DiasABP', 'HR', 'Na', 'Lactate', 'NIDiasABP', 'PaO2', 'WBC', 'pH', 'Albumin', 'ALT', 'Glucose', 'SaO2',
              'Temp', 'AST', 'Bilirubin', 'HCO3', 'BUN', 'RespRate', 'Mg', 'HCT', 'SysABP', 'FiO2', 'K', 'GCS',
              'Cholesterol', 'NISysABP', 'TroponinT', 'MAP', 'TroponinI', 'PaCO2', 'Platelets', 'Urine', 'NIMAP',
              'Creatinine', 'ALP']

# mean and std of 35 attributes
mean = np.array([59.540976152469405, 86.72320413227443, 139.06972964987443, 2.8797765291788986, 58.13833409690321,
                 147.4835678885565, 12.670222585415166, 7.490957887101613, 2.922874149659863, 394.8899400819931,
                 141.4867570064675, 96.66380228136883, 37.07362841054398, 505.5576196473552, 2.906465787821709,
                 23.118951553526724, 27.413004968675743, 19.64795551193981, 2.0277491155660416, 30.692432164676188,
                 119.60137167841977, 0.5404785381886381, 4.135790642787733, 11.407767149315339, 156.51746031746032,
                 119.15012244292181, 1.2004983498349853, 80.20321011673151, 7.127188940092161, 40.39875518672199,
                 191.05877024038804, 116.1171573535279, 77.08923183026529, 1.5052390166989214, 116.77122488658458])

std = np.array(
    [13.01436781437145, 17.789923096504985, 5.185595006246348, 2.5287518090506755, 15.06074282896952, 85.96290370390257,
     7.649058756791069, 8.384743923130074, 0.6515057685658769, 1201.033856726966, 67.62249645388543, 3.294112002091972,
     1.5604879744921516, 1515.362517984297, 5.902070316876287, 4.707600932877377, 23.403743427107095, 5.50914416318306,
     0.4220051299992514, 5.002058959758486, 23.730556355204214, 0.18634432509312762, 0.706337033602292,
     3.967579823394297, 45.99491531484596, 21.97610723063014, 2.716532297586456, 16.232515568438338, 9.754483687298688,
     9.062327978713556, 106.50939503021543, 170.65318497610315, 14.856134327604906, 1.6369529387005546,
     133.96778334724377])

fs = open('./json/json', 'w')

def to_time_bin(x):
    h, m = map(int, x.split(':'))
    return h


def parse_data(x):
    x = x.set_index('Parameter').to_dict()['Value']

    values = []

    for attr in attributes:
#         if x.has_key(attr):
        if attr in x:
            values.append(x[attr])
        else:
            values.append(np.nan)
    return values


def parse_delta(masks, dir_):
    if dir_ == 'backward':
        masks = masks[::-1]

    deltas = []

    for h in range(48):
        if h == 0:
            deltas.append(np.ones(35))
        else:
            deltas.append(np.ones(35) + (1 - masks[h]) * deltas[-1])

    return np.array(deltas)


def parse_rec(values, masks, evals, eval_masks, dir_):
    deltas = parse_delta(masks, dir_)

    # only used in GRU-D
    forwards = pd.DataFrame(values).fillna(method='ffill').fillna(0.0).as_matrix()

    rec = {}

    rec['values'] = np.nan_to_num(values).tolist()
    rec['masks'] = masks.astype('float64').tolist()
#     rec['masks'] = masks.astype('int32').tolist()
    # imputation ground-truth
    rec['evals'] = np.nan_to_num(evals).tolist()
#     rec['eval_masks'] = eval_masks.astype('int32').tolist()
    rec['eval_masks'] = eval_masks.astype('float64').tolist()
    rec['forwards'] = forwards.tolist()
    rec['deltas'] = deltas.tolist()

    return rec


def parse_id(id_):
    data = pd.read_csv('./raw/{}.txt'.format(id_))
    # accumulate the records within one hour
    data['Time'] = data['Time'].apply(lambda x: to_time_bin(x))

    evals = []

    # merge all the metrics within one hour
    for h in range(48):
        evals.append(parse_data(data[data['Time'] == h]))

    evals = (np.array(evals) - mean) / std

    shp = evals.shape

    evals = evals.reshape(-1)

    # randomly eliminate 10% values as the imputation ground-truth
    indices = np.where(~np.isnan(evals))[0].tolist()
    indices = np.random.choice(indices, len(indices) // 10)

    values = evals.copy()
    values[indices] = np.nan

    masks = ~np.isnan(values)
    eval_masks = (~np.isnan(values)) ^ (~np.isnan(evals))

    evals = evals.reshape(shp)
    values = values.reshape(shp)
    
#     print (masks.dtype)
    
    masks = masks.reshape(shp)
    eval_masks = eval_masks.reshape(shp)

#     label = out.loc[int(id_)]
    label = int(out.loc[int(id_)])
#     print (label.dtype)

    rec = {'label': label}

    # prepare the model for both directions
    rec['forward'] = parse_rec(values, masks, evals, eval_masks, dir_='forward')
    rec['backward'] = parse_rec(values[::-1], masks[::-1], evals[::-1], eval_masks[::-1], dir_='backward')
    
#     print (rec)
    
    rec = json.dumps(rec)

    fs.write(rec + '\n')

print ('functions loaded')

functions loaded


In [31]:
parse_id(patient_ids[0])

In [32]:
for id_ in patient_ids:
    print('Processing patient {}'.format(id_))
    try:
        parse_id(id_)
    except Exception as e:
        print(e)
        continue

fs.close()

Processing patient 134847
Processing patient 132949
Processing patient 134758
Processing patient 136516
Processing patient 141925
Processing patient 133040
Processing patient 139210
Processing patient 135276
Processing patient 137422
Processing patient 139126
Processing patient 137407
Processing patient 136365
Processing patient 135111
Processing patient 139644
Processing patient 141302
Processing patient 139354
Processing patient 137515
Processing patient 135346
Processing patient 137588
Processing patient 134686
Processing patient 141915
Processing patient 132637
Processing patient 137452
Processing patient 134034
Processing patient 135961
Processing patient 136655
Processing patient 141116
Processing patient 135677
Processing patient 141269
Processing patient 134007
Processing patient 141264
a must be non-empty
Processing patient 141832
Processing patient 141642
Processing patient 137224
Processing patient 135698
Processing patient 135662
Processing patient 140867
Processing patient

Processing patient 134398
Processing patient 140230
Processing patient 139241
Processing patient 134038
Processing patient 140751
Processing patient 139474
Processing patient 133228
Processing patient 135993
Processing patient 138089
Processing patient 136346
Processing patient 139682
Processing patient 133706
Processing patient 141330
Processing patient 139987
Processing patient 141090
Processing patient 136749
Processing patient 142033
Processing patient 135723
Processing patient 141782
Processing patient 135994
Processing patient 135498
Processing patient 135981
Processing patient 139723
Processing patient 133488
Processing patient 137333
Processing patient 136786
Processing patient 139910
Processing patient 138228
Processing patient 141795
Processing patient 138532
Processing patient 142561
Processing patient 134147
Processing patient 133740
Processing patient 133816
Processing patient 140048
Processing patient 133092
Processing patient 136995
Processing patient 141194
Processing p

Processing patient 134003
Processing patient 133026
Processing patient 135027
Processing patient 140662
Processing patient 133506
Processing patient 136887
Processing patient 140889
Processing patient 142603
Processing patient 132662
Processing patient 132958
Processing patient 140289
Processing patient 135728
Processing patient 137631
Processing patient 141560
Processing patient 135159
Processing patient 133428
Processing patient 139610
Processing patient 137979
Processing patient 137766
Processing patient 142661
Processing patient 141256
Processing patient 140646
Processing patient 140711
Processing patient 139768
Processing patient 138638
Processing patient 138485
Processing patient 141610
Processing patient 135355
Processing patient 138604
Processing patient 137099
Processing patient 139865
Processing patient 136599
Processing patient 133865
Processing patient 141185
Processing patient 142523
Processing patient 137193
Processing patient 136895
Processing patient 133245
Processing p

Processing patient 135357
Processing patient 139975
Processing patient 142514
Processing patient 134749
Processing patient 136572
Processing patient 142312
Processing patient 132766
Processing patient 136889
Processing patient 133152
Processing patient 136389
Processing patient 134864
Processing patient 134108
Processing patient 133219
Processing patient 133325
Processing patient 139842
Processing patient 141119
Processing patient 134650
Processing patient 142215
Processing patient 134732
Processing patient 135983
Processing patient 138551
Processing patient 134521
Processing patient 141790
Processing patient 139677
Processing patient 138861
Processing patient 137548
Processing patient 134029
Processing patient 136271
Processing patient 136692
Processing patient 132777
Processing patient 136640
Processing patient 132821
Processing patient 134900
Processing patient 135415
Processing patient 137657
Processing patient 139919
Processing patient 132845
Processing patient 134634
Processing p

Processing patient 137914
Processing patient 141300
Processing patient 133643
Processing patient 133280
Processing patient 133540
Processing patient 136155
Processing patient 136828
Processing patient 133213
Processing patient 132577
Processing patient 134522
Processing patient 138751
Processing patient 133274
Processing patient 139803
Processing patient 133925
Processing patient 137335
Processing patient 132893
Processing patient 134466
Processing patient 137793
Processing patient 132791
Processing patient 135290
Processing patient 139159
Processing patient 140901
Processing patient 134100
Processing patient 141235
Processing patient 133237
Processing patient 137723
Processing patient 132891
Processing patient 138893
Processing patient 136641
Processing patient 140877
Processing patient 140331
Processing patient 137904
Processing patient 139216
Processing patient 136488
Processing patient 135145
Processing patient 133636
Processing patient 138480
Processing patient 132738
Processing p

Processing patient 132933
Processing patient 133770
Processing patient 141000
Processing patient 141229
Processing patient 137639
Processing patient 132895
Processing patient 138310
Processing patient 140806
Processing patient 136417
Processing patient 136661
Processing patient 136802
Processing patient 133295
Processing patient 134335
Processing patient 135962
Processing patient 133846
Processing patient 141279
Processing patient 137713
Processing patient 141127
Processing patient 136561
Processing patient 140827
Processing patient 138621
Processing patient 139853
Processing patient 132688
Processing patient 135380
Processing patient 140807
Processing patient 140657
Processing patient 133287
Processing patient 141283
Processing patient 137217
Processing patient 133989
Processing patient 140832
Processing patient 139794
Processing patient 142557
Processing patient 139438
Processing patient 138697
Processing patient 137258
Processing patient 141999
Processing patient 141951
Processing p

Processing patient 134520
Processing patient 134213
Processing patient 135178
Processing patient 140347
Processing patient 133514
Processing patient 134709
Processing patient 142485
Processing patient 132540
Processing patient 139928
Processing patient 142583
Processing patient 141168
Processing patient 139257
Processing patient 141136
Processing patient 137383
Processing patient 136236
Processing patient 136110
Processing patient 134460
Processing patient 135210
Processing patient 132772
Processing patient 139831
Processing patient 140468
Processing patient 140339
Processing patient 139981
Processing patient 142107
Processing patient 141511
Processing patient 138068
Processing patient 134786
Processing patient 135506
Processing patient 139090
Processing patient 137332
Processing patient 138187
Processing patient 140034
Processing patient 134042
Processing patient 133715
Processing patient 133267
Processing patient 139433
Processing patient 140037
Processing patient 133516
Processing p

Processing patient 138075
Processing patient 137751
Processing patient 135363
Processing patient 142346
Processing patient 136225
Processing patient 133525
Processing patient 132819
Processing patient 140294
Processing patient 140165
Processing patient 142433
Processing patient 137233
Processing patient 134299
Processing patient 141354
Processing patient 137396
Processing patient 137358
Processing patient 133546
Processing patient 140447
Processing patient 135684
Processing patient 135406
Processing patient 142428
Processing patient 132622
Processing patient 135653
Processing patient 138142
Processing patient 134207
Processing patient 140761
Processing patient 142115
Processing patient 136618
Processing patient 141688
Processing patient 136520
Processing patient 139163
Processing patient 135794
Processing patient 139394
Processing patient 136915
Processing patient 138472
Processing patient 141266
Processing patient 137242
Processing patient 133188
Processing patient 138344
Processing p

Processing patient 132648
Processing patient 140578
Processing patient 135967
Processing patient 137058
Processing patient 141826
Processing patient 135895
Processing patient 140068
Processing patient 137501
Processing patient 140139
Processing patient 139158
Processing patient 134420
Processing patient 141828
Processing patient 140116
Processing patient 140070
Processing patient 136594
Processing patient 137161
Processing patient 136540
Processing patient 139869
Processing patient 142031
Processing patient 138435
Processing patient 133031
Processing patient 136785
Processing patient 135246
Processing patient 134318
Processing patient 137583
Processing patient 141393
Processing patient 134710
Processing patient 134755
Processing patient 139079
Processing patient 141114
Processing patient 140001
Processing patient 132915
Processing patient 141071
Processing patient 138184
Processing patient 138595
Processing patient 136122
Processing patient 140707
Processing patient 141032
Processing p

Processing patient 137243
Processing patient 142290
Processing patient 137139
Processing patient 135902
Processing patient 140214
Processing patient 139661
Processing patient 138838
Processing patient 139295
Processing patient 135080
Processing patient 133512
Processing patient 134551
Processing patient 140933
Processing patient 135343
Processing patient 140382
Processing patient 139847
Processing patient 132852
Processing patient 134112
Processing patient 142386
Processing patient 138369
Processing patient 142404
Processing patient 138254
Processing patient 141786
Processing patient 133303
Processing patient 142429
Processing patient 140030
Processing patient 141590
Processing patient 137518
Processing patient 134464
Processing patient 137187
Processing patient 133632
Processing patient 134069
Processing patient 139954
Processing patient 141449
Processing patient 134600
Processing patient 137635
Processing patient 141866
Processing patient 136874
Processing patient 135963
Processing p

Processing patient 138608
Processing patient 139464
Processing patient 132659
Processing patient 142667
Processing patient 137424
Processing patient 142607
Processing patient 139942
Processing patient 136579
Processing patient 135438
Processing patient 138651
Processing patient 134162
Processing patient 134311
Processing patient 142327
Processing patient 142665
Processing patient 138788
Processing patient 140767
Processing patient 140904
Processing patient 140794
Processing patient 137856
Processing patient 139310
Processing patient 140278
Processing patient 136327
Processing patient 141561
Processing patient 138038
Processing patient 141247
Processing patient 133016
Processing patient 135472
Processing patient 140610
Processing patient 137783
Processing patient 136215
Processing patient 134609
Processing patient 141617
Processing patient 139955
Processing patient 135972
Processing patient 140982
Processing patient 136321
Processing patient 136037
Processing patient 142025
Processing p

Processing patient 136723
Processing patient 137721
Processing patient 135341
Processing patient 138065
Processing patient 142601
Processing patient 140097
Processing patient 135513
Processing patient 138451
Processing patient 134659
Processing patient 141872
Processing patient 132799
Processing patient 140259
Processing patient 133644
Processing patient 136714
Processing patient 134355
Processing patient 132698
Processing patient 137117
Processing patient 134039
Processing patient 136063
Processing patient 141732
Processing patient 133406
Processing patient 134919
Processing patient 137806
Processing patient 142626
Processing patient 137537
Processing patient 136223
Processing patient 135318
Processing patient 136188
Processing patient 136322
Processing patient 133270
Processing patient 133862
Processing patient 136569
Processing patient 133257
Processing patient 133551
Processing patient 134608
Processing patient 134568
Processing patient 134908
Processing patient 140112
Processing p

Processing patient 134694
Processing patient 138376
Processing patient 134028
Processing patient 138793
Processing patient 141672
Processing patient 141062
Processing patient 137070
Processing patient 135672
Processing patient 140845
Processing patient 138963
Processing patient 133370
Processing patient 142359
Processing patient 134330
Processing patient 133570
Processing patient 136886
Processing patient 140166
Processing patient 139613
Processing patient 137177
Processing patient 134893
Processing patient 138466
Processing patient 138652
Processing patient 140335
Processing patient 137215
Processing patient 140320
Processing patient 135427
Processing patient 134676
Processing patient 133875
Processing patient 133431
Processing patient 134882
Processing patient 142037
Processing patient 142206
Processing patient 140248
Processing patient 133372
Processing patient 139643
Processing patient 135236
Processing patient 134465
Processing patient 139573
Processing patient 137637
Processing p

In [1]:
import json
import fancyimpute
import numpy as np
import pandas as pd

X = []
Y = []
Z = []

for ctx in open('json/json'):
    z = json.loads(ctx)['label']
    ctx = json.loads(ctx)['forward']
    x = np.asarray(ctx['values'])
    y = np.asarray(ctx['evals'])


    x_mask = np.asarray(ctx['masks']).astype(np.bool)
    y_mask = np.asarray(ctx['eval_masks']).astype(np.bool)

    x[~x_mask] = np.nan

    y[(~x_mask) & (~y_mask)] = np.nan

    X.append(x)
    Y.append(y)
    Z.append(int(z))

def get_loss(X, X_pred, Y):
    # find ones in Y but not in X (ground truth)
    mask = np.isnan(X) ^ np.isnan(Y)

    X_pred = np.nan_to_num(X_pred)
    pred = X_pred[mask]
    label = Y[mask]

    mae = np.abs(pred - label).sum() / (1e-5 + np.sum(mask))
    mre = np.abs(pred - label).sum() / (1e-5 + np.sum(np.abs(label)))

    return {'mae': mae, 'mre': mre}
print ('loaded')

Using TensorFlow backend.


loaded


In [21]:
X_mice = []

# since the data matrix of one patient is a singular matrix, we merge a batch of matrices and do MICE impute

n = len(X)
batch_size = 128
nb_batch = (n + batch_size - 1) // batch_size

for i in range(nb_batch):
    print('On batch {}'.format(i))
    x = np.concatenate(X[i * batch_size: (i + 1) * batch_size])
    y = np.concatenate(Y[i * batch_size: (i + 1) * batch_size])
    x_mice = fancyimpute.IterativeImputer(verbose=False).fit_transform(x)

    X_mice.append(x_mice)
print ('done')

On batch 0
On batch 1
On batch 2
On batch 3
On batch 4
On batch 5
On batch 6
On batch 7
On batch 8
On batch 9
On batch 10
On batch 11
On batch 12
On batch 13
On batch 14
On batch 15
On batch 16
On batch 17
On batch 18
On batch 19
On batch 20
On batch 21
On batch 22
On batch 23
On batch 24
On batch 25
On batch 26
On batch 27
On batch 28
On batch 29
On batch 30
On batch 31
done


In [26]:
X_mice_2 = np.concatenate(X_mice[:-1], axis=0)
# X_c = np.concatenate(X, axis=0)
# Y_c = np.concatenate(Y, axis=0)
print ('done')

done


In [32]:
print('MICE imputation')
print(get_loss(X_c[:190464], X_mice_2, Y_c[:190464]))

MICE imputation
{'mre': 0.9965220452759496, 'mae': 0.7001357028663372}


In [29]:
X_mice_2.shape

(190464, 35)

In [31]:
Y_c[:190464].shape

(190464, 35)