In [2]:
import sys 
import os 
import numpy as np 
import pandas as pd 
import pickle 

sys.path.append('/data/datasets/rrs-data/rrs_kit')

from rrs_kit.DataClass import DataPath
from rrs_kit.utils import (
  filter_sign,
  adjust_cbc,
  adjust_chem,
  get_target_df,
  get_merge_data,
  make_timestamp,
  make_sequence_data,
  make_2d_data
)


In [2]:
dp = DataPath()

valid_trn_sign = pd.read_csv(
    os.path.join(dp.valid_path, 'trn_abn_flowsheet.csv'),
    encoding = 'CP949'
)

valid_trn_sign_weird = pd.read_csv(
    os.path.join(dp.valid_path, 'trn_abn_flowsheet_weird.csv'),
    encoding = 'CP949'
)


valid_tst_sign = pd.read_csv(
    os.path.join(dp.valid_path, 'tst_abn_flowsheet.csv'),
    encoding = 'CP949'
)

valid_tst_sign_weird = pd.read_csv(
    os.path.join(dp.valid_path, 'tst_abn_flowsheet_weird.csv'),
    encoding = 'CP949'
)

valid_trn_nl_sign = pd.read_csv(
    os.path.join(dp.valid_path, 'trn_nl_flowsheet.csv')
)

valid_tst_nl_sign = pd.read_csv(
    os.path.join(dp.valid_path, 'tst_nl_flowsheet.csv')
)

valid_trn_abn_sample = pd.read_csv(
    '/data/datasets/rrs-data/10yrs_samples/10yrs_trn_abn_200.csv'
)

valid_tst_abn_sample = pd.read_csv(
    '/data/datasets/rrs-data/10yrs_samples/10yrs_tst_abn_200.csv'
)

valid_nl_sample = pd.read_csv(
    '/data/datasets/rrs-data/10yrs_samples/10yrs_trn_nl_sample.csv'
)




In [3]:
filter_time = ['ER', 'intu', 'x', 'PED', 'intu<48hr', 'ped', 'post-op', np.nan]
valid_trn_abn_sample_filter = valid_trn_abn_sample[~valid_trn_abn_sample['detection_time'].isin(filter_time)]
valid_trn_abn_sample_filter = valid_trn_abn_sample_filter[['patient_id', 'event_time', 'detection_time', 'gender', 'birthday']]
valid_trn_abn_sample_filter

Unnamed: 0,patient_id,event_time,detection_time,gender,birthday
1,CSYR0JDA,2015-11-08 1:53,2015-11-07 16:00,F,19460930
3,GLP4GNKW,2016-01-23 19:28,2016-01-22 11:30,M,19570907
6,KKIQQZAT,2015-12-19 16:40,2015-12-18 17:05,M,19690617
8,YOYWOGBG,2014-10-18 20:47,2014-10-16 8:00,M,19460510
10,ZH729YUP,2014-02-20 10:45,2014-02-20 10:00,M,19470520
...,...,...,...,...,...
190,GPU1APUY,2014-09-27 15:15,2014-09-25 8:30,M,19610802
191,TI5LIME7,2014-02-07 11:58,2014-02-07 8:00,F,19660113
192,VZ551HO6,2010-07-21 9:19,2010-07-20 8:30,M,19340502
194,OCBYNZGD,2015-05-08 14:16,2015-05-08 13:10,F,19361229


In [4]:
filter_time = ['post-op', 'intubation', 'intubation ','intu<48hr', 'intu', 'x', 'ER', 'ped', 'PED', np.nan]
valid_tst_abn_sample_filter = valid_tst_abn_sample[~valid_tst_abn_sample['detection_time'].isin(filter_time)]
valid_tst_abn_sample_filter = valid_tst_abn_sample_filter[['patient_id', 'event_time', 'detection_time', 'gender', 'birthday']]
valid_tst_abn_sample_filter

Unnamed: 0,patient_id,event_time,detection_time,gender,birthday
1,B6C79PUT,2012-07-05 21:47,2012-07-05 16:00,F,19411117
2,BKIMZ4N0,2013-06-22 2:48,2013-06-21 2:30,M,19390115
3,BRD8WZWE,2015-12-19 11:38,2015-12-19 11:20,F,19471116
4,BROKS3HV,2015-03-21 13:23,2015-03-21 13:00,M,19370130
5,C2APLJG5,2015-09-04 15:17,2015-08-20 12:10,F,19220720
...,...,...,...,...,...
190,SKEDU6EG,2015-10-22 10:32,2015-10-20 16:00,M,19370410
191,COHCU2OQ,2011-09-14 20:30,2011-09-14 16:00,M,19380114
194,AMW0IOX4,2011-01-01 16:55,2010-12-26 11:00,M,19311220
196,SZGRZC0F,2012-12-25 13:42,2012-12-23 21:30,F,19831228


In [5]:
valid_event = pd.concat([valid_trn_abn_sample_filter, valid_tst_abn_sample_filter])
valid_event

Unnamed: 0,patient_id,event_time,detection_time,gender,birthday
1,CSYR0JDA,2015-11-08 1:53,2015-11-07 16:00,F,19460930
3,GLP4GNKW,2016-01-23 19:28,2016-01-22 11:30,M,19570907
6,KKIQQZAT,2015-12-19 16:40,2015-12-18 17:05,M,19690617
8,YOYWOGBG,2014-10-18 20:47,2014-10-16 8:00,M,19460510
10,ZH729YUP,2014-02-20 10:45,2014-02-20 10:00,M,19470520
...,...,...,...,...,...
190,SKEDU6EG,2015-10-22 10:32,2015-10-20 16:00,M,19370410
191,COHCU2OQ,2011-09-14 20:30,2011-09-14 16:00,M,19380114
194,AMW0IOX4,2011-01-01 16:55,2010-12-26 11:00,M,19311220
196,SZGRZC0F,2012-12-25 13:42,2012-12-23 21:30,F,19831228


In [6]:
verbose = True

valid_sign = pd.concat([valid_trn_sign, valid_tst_sign])

valid_sign['patient_id'] = valid_sign['patient'].apply(lambda x: x[:8])

valid_sign = valid_sign[valid_sign['patient_id'].isin(valid_event['patient_id'])]

valid_sign = valid_sign.rename(columns={
'Temp': 'BT'
})  
valid_sign = valid_sign.drop(['event_time'], axis = 1)

valid_sign['measurement_time'] = pd.to_datetime(valid_sign['measurement_time'])

# filter vital sign 
valid_sign = filter_sign(valid_sign, 'patient_id', time = 'measurement_time')

In [7]:
valid_sign_nl = pd.concat([valid_trn_nl_sign, valid_tst_nl_sign])
valid_sign_nl['patient_id'] = valid_sign_nl['patient'].apply(lambda x: x[:8])
valid_sign_nl = valid_sign_nl[valid_sign_nl['patient_id'].isin(valid_nl_sample['patient_id'])]

valid_sign_nl = valid_sign_nl.rename(columns={
'Temp': 'BT'
})  

valid_sign_nl['measurement_time'] = pd.to_datetime(valid_sign_nl['measurement_time'])

# filter vital sign 
valid_sign_nl = filter_sign(valid_sign_nl, 'patient_id', time = 'measurement_time')


In [8]:

# join two tabels
valid_data = pd.merge(valid_sign, valid_event, left_on = 'patient_id', right_on = 'patient_id', how = 'left')

valid_data['event_time'] = pd.to_datetime(valid_data['event_time'])
valid_data['adjusted_time'] = pd.to_datetime(valid_data['adjusted_time'])


In [9]:
#valid_data['detection_time'].unique()
valid_data['detection_time'][valid_data['detection_time'] == '2012-05-30013:30'] = '2012-05-30 13:30'
valid_data['detection_time'][valid_data['detection_time'] == '2012-02-03 14-55'] = '2012-02-03 14:55'



In [10]:
valid_data['detection_time'] = pd.to_datetime(valid_data['detection_time'].apply(lambda x: str(x)))


In [11]:
valid_nl = pd.merge(valid_sign_nl, valid_nl_sample[['patient_id', 'gender', 'birthday']], left_on = 'patient_id', right_on = 'patient_id', how = 'left')
valid_nl[~valid_nl.duplicated('patient_id')]

Unnamed: 0,patient_id,measurement_time,patient,adjusted_time,HR,RR,SBP,SaO2,BT,gender,birthday
0,A1YFM8RK,2016-12-14 17:00:00,A1YFM8RK_20161214,2016-12-14 17:00:00,72.0,18.0,134.5,99.0,36.6,M,19440428
56,A2URZ0WO,2013-06-06 15:00:00,A2URZ0WO_20130605,2013-06-06 15:00:00,64.0,15.0,151.0,100.0,36.1,M,19511108
114,A466E7ZP,2014-11-27 14:00:00,A466E7ZP_20141126,2014-11-27 14:00:00,99.0,23.0,90.0,100.0,36.4,F,19590408
149,A7L184WN,2015-06-03 19:00:00,A7L184WN_20150602,2015-06-03 19:00:00,62.0,14.0,106.0,98.0,36.4,F,19380104
203,A7OD133R,2014-08-08 22:00:00,A7OD133R_20140808,2014-08-08 22:00:00,90.0,25.0,187.0,100.0,36.0,F,19340926
...,...,...,...,...,...,...,...,...,...,...,...
475779,ZSS6SCRV,2015-11-17 08:00:00,ZSS6SCRV_20151117,2015-11-17 08:00:00,80.0,20.0,109.0,100.0,36.3,M,19470208
475892,ZSWGM94V,2015-05-20 15:00:00,ZSWGM94V_20150514,2015-05-20 15:00:00,101.0,14.0,118.0,95.0,36.3,M,19390110
475950,ZT62F5L9,2013-09-09 22:00:00,ZT62F5L9_20130909,2013-09-09 22:00:00,101.0,29.0,94.0,100.0,37.4,F,19430528
476049,ZUPOSLJF,2011-08-19 12:00:00,ZUPOSLJF_20110818,2011-08-19 12:00:00,124.0,17.0,97.0,95.0,36.7,F,19380705


In [12]:
# target
valid_data = get_target_df(valid_data)
#valid_data.target.value_counts()

In [13]:
valid_data[valid_data['target'] == 0][['adjusted_time', 'event_time', 'detection_time']]

Unnamed: 0,adjusted_time,event_time,detection_time
225,2009-02-17 20:00:00,2009-03-04 12:32:00,2009-03-04 09:50:00
226,2009-02-17 21:00:00,2009-03-04 12:32:00,2009-03-04 09:50:00
227,2009-02-17 22:00:00,2009-03-04 12:32:00,2009-03-04 09:50:00
228,2009-02-17 23:00:00,2009-03-04 12:32:00,2009-03-04 09:50:00
229,2009-02-17 23:00:00,2009-03-04 12:32:00,2009-03-04 09:50:00
...,...,...,...
224561,2010-11-27 03:00:00,2010-11-27 21:54:00,2010-11-27 08:00:00
224562,2010-11-27 04:00:00,2010-11-27 21:54:00,2010-11-27 08:00:00
224563,2010-11-27 05:00:00,2010-11-27 21:54:00,2010-11-27 08:00:00
224564,2010-11-27 06:00:00,2010-11-27 21:54:00,2010-11-27 08:00:00


In [14]:
valid_nl = get_target_df(valid_nl, False)
#get_target_df(valid_data)
valid_nl.target.value_counts()

0    476330
Name: target, dtype: int64

In [15]:
valid_data['gender'] = valid_data['gender'].astype('category').cat.codes
valid_data['TS'] = make_timestamp(valid_data, index = 'patient_id')
valid_data['birthday'] = pd.to_datetime(valid_data['birthday'])
valid_data['Age'] = (valid_data['event_time'] - valid_data['birthday']).astype('timedelta64[D]')
valid_data['Age'] = (valid_data['Age'] / (366)).round().astype(int)
valid_data = valid_data.drop(['patient', 'birthday'], axis = 1)
valid_data = valid_data.rename(columns = {
'gender': 'Gender'
})
print('-' * 50)
print('CBC data')
valid_blood_trn_cbc = pd.read_csv(
    os.path.join(dp.valid_path, 'trn_abn_cbc.csv'),
    encoding = 'CP949'
)
valid_blood_tst_cbc = pd.read_csv(
    os.path.join(dp.valid_path, 'tst_abn_cbc.csv'),
    encoding = 'CP949'
)

--------------------------------------------------
CBC data


In [16]:
valid_nl['gender'] = valid_nl['gender'].astype('category').cat.codes
valid_nl['TS'] = make_timestamp(valid_nl, index = 'patient_id')
valid_nl['birthday'] = pd.to_datetime(valid_nl['birthday'])
valid_nl['Age'] = (valid_nl['measurement_time'] - valid_nl['birthday']).astype('timedelta64[D]')
valid_nl['Age'] = (valid_nl['Age'] / (366)).round().astype(int)
valid_nl = valid_nl.drop(['patient', 'birthday'], axis = 1)
valid_nl = valid_nl.rename(columns = {
    'gender': 'Gender'
})


In [17]:
print('-' * 50)
print('CBC data')
valid_blood_trn_cbc_nl = pd.read_csv(
    os.path.join(dp.valid_path, 'trn_nl_cbc.csv'),
    encoding = 'CP949'
)
valid_blood_trn_cbc_nl

--------------------------------------------------
CBC data


Unnamed: 0,patient,adjusted_time,Hgb,Platelet Count,WBC Count,measurement_time
0,A01J9AP0_20150612,2015-06-12 20:00:00,7.7,12.0,4.0,2015-06-12 19:31:00
1,A01J9AP0_20150612,2015-06-12 21:00:00,7.7,12.0,4.0,2015-06-12 19:31:00
2,A01J9AP0_20150612,2015-06-12 22:00:00,7.7,12.0,4.0,2015-06-12 19:31:00
3,A01J9AP0_20150612,2015-06-12 23:00:00,7.7,12.0,4.0,2015-06-12 19:31:00
4,A01J9AP0_20150612,2015-06-13 00:00:00,7.7,12.0,4.0,2015-06-12 19:31:00
...,...,...,...,...,...,...
4764645,ZZZZRQ96_20180513,2018-05-20 02:00:00,9.4,82.0,16.5,2018-05-19 05:43:00
4764646,ZZZZRQ96_20180513,2018-05-20 03:00:00,9.4,82.0,16.5,2018-05-19 05:43:00
4764647,ZZZZRQ96_20180513,2018-05-20 04:00:00,9.4,82.0,16.5,2018-05-19 05:43:00
4764648,ZZZZRQ96_20180513,2018-05-20 05:00:00,9.4,82.0,16.5,2018-05-19 05:43:00


In [18]:
  valid_blood_cbc = pd.concat([valid_blood_trn_cbc, valid_blood_tst_cbc])
  valid_blood_cbc['patient_id'] = valid_blood_cbc['patient'].apply(lambda x: x[:8])
  valid_blood_cbc = valid_blood_cbc[valid_blood_cbc['patient_id'].isin(valid_event['patient_id'])]
  valid_blood_cbc = valid_blood_cbc.drop(['event_time', 'measurement_time', 'patient'], axis = 1)
  
  # filter cbc value
  valid_blood_cbc = adjust_cbc(valid_blood_cbc)
  valid_blood_cbc = valid_blood_cbc.rename(columns = {
    'Platelet Count': 'platelet'
  })
  ##########
  print('-' * 50)
  print('Chem data')
  valid_blood_trn_lab = pd.read_csv(
    os.path.join(dp.valid_path, 'trn_abn_chem.csv'),
    encoding = 'CP949'
  )
  valid_blood_tst_lab = pd.read_csv(
    os.path.join(dp.valid_path, 'tst_abn_chem.csv'),
    encoding = 'CP949'
  )

  valid_blood_chem = pd.concat([valid_blood_trn_lab, valid_blood_tst_lab])
  valid_blood_chem['patient_id'] = valid_blood_chem['patient'].apply(lambda x: x[:8])
  valid_blood_chem = valid_blood_chem[valid_blood_chem['patient_id'].isin(valid_event['patient_id'])]
  valid_blood_chem = valid_blood_chem.drop(['event_time', 'measurement_time', 'patient'], axis = 1)

  # adjust chem values
  valid_blood_chem = adjust_chem(valid_blood_chem)
  valid_blood_chem = valid_blood_chem.rename(columns = {
    'Total Bilirubin': 'Total bilirubin',
    'Total Protein': 'Total protein',
    'Total Calcium': 'Total calcium',
    'Alkaline Phosphatase' : 'Alkaline phosphatase',
    'Creatinine': 'Creatinin'
  })
  if verbose:
    print('Glucose freq: ')
    print( valid_blood_chem['Glucose'].value_counts())
    print('Sodium counts: ')
    print( valid_blood_chem['Sodium'].value_counts())
    print('Potassium counts: ')
    print( valid_blood_chem['Potassium'].value_counts())
    print('Chloride counts: ')
    print( valid_blood_chem['Chloride'].value_counts())

  # merge blood data

  valid_blood = pd.merge(valid_blood_cbc, valid_blood_chem, how = 'left', \
    on = ['patient_id', 'adjusted_time'])

  blood_properties = ['WBC Count', 'Platelet Count', 'Hgb','BUN', 'creatinin', 'Glucose', 
                  'Sodium', 'Potassium', 'Chloride', 'Total protein', 'Total bilirubin',
                  'Albumin', 'CRP','Total calcium', 'Lactate', 'Alkaline phosphatase',
                  'AST', 'ALT']

  #for p in blood_properties:
  #  valid_blood[p].fillna(round(valid_blood[p].mean(), 1), inplace=True)


--------------------------------------------------
Chem data
Glucose freq: 
2    9792
1     789
0      73
Name: Glucose, dtype: int64
Sodium counts: 
1    6030
0    3963
2     661
Name: Sodium, dtype: int64
Potassium counts: 
1    7311
0    2692
2     651
Name: Potassium, dtype: int64
Chloride counts: 
1    7829
0    1827
2     998
Name: Chloride, dtype: int64


In [19]:
valid_blood_cbc_nl = valid_blood_trn_cbc_nl#pd.concat([valid_blood_trn_cbc_nl])
valid_blood_cbc_nl['patient_id'] = valid_blood_cbc_nl['patient'].apply(lambda x: x[:8])
valid_blood_cbc_nl = valid_blood_cbc_nl[valid_blood_cbc_nl['patient_id'].isin(valid_nl_sample['patient_id'])]
#  valid_blood_cbc_nl = valid_blood_cbc_nl.drop(['measurement_time', 'patient'], axis = 1)
  
  # filter cbc value
valid_blood_cbc_nl = adjust_cbc(valid_blood_cbc_nl)
valid_blood_cbc_nl = valid_blood_cbc_nl.rename(columns = {
'Platelet Count': 'platelet'
})
valid_blood_cbc_nl 

Unnamed: 0,patient,adjusted_time,Hgb,platelet,WBC Count,measurement_time,patient_id
8712,A1YFM8RK_20150910,2015-09-10 15:00:00,11.5,223.0,1,2015-09-10 14:35:00,A1YFM8RK
8713,A1YFM8RK_20151001,2015-10-01 16:00:00,11.2,245.0,1,2015-10-01 15:56:00,A1YFM8RK
8714,A1YFM8RK_20151022,2015-10-22 16:00:00,10.7,189.0,0,2015-10-22 16:16:00,A1YFM8RK
8715,A1YFM8RK_20151022,2015-10-22 17:00:00,10.7,189.0,0,2015-10-22 16:16:00,A1YFM8RK
8716,A1YFM8RK_20151022,2015-10-22 18:00:00,10.7,189.0,0,2015-10-22 16:16:00,A1YFM8RK
...,...,...,...,...,...,...,...
4760584,ZZF3O2KG_20100629,2010-07-05 08:00:00,11.5,262.0,0,2010-07-03 10:58:00,ZZF3O2KG
4760585,ZZF3O2KG_20100629,2010-07-05 09:00:00,11.5,262.0,0,2010-07-03 10:58:00,ZZF3O2KG
4760586,ZZF3O2KG_20100629,2010-07-05 10:00:00,11.5,262.0,0,2010-07-03 10:58:00,ZZF3O2KG
4760587,ZZF3O2KG_20100629,2010-07-05 11:00:00,11.5,262.0,0,2010-07-03 10:58:00,ZZF3O2KG


In [20]:
##########
print('-' * 50)
print('Chem data')
valid_blood_trn_lab_nl = pd.read_csv(
    os.path.join(dp.valid_path, 'trn_nl_chem.csv'),
    encoding = 'CP949'
)

valid_blood_chem_nl = valid_blood_trn_lab_nl #pd.concat([valid_blood_trn_lab, valid_blood_tst_lab])
valid_blood_chem_nl['patient_id'] = valid_blood_chem_nl['patient'].apply(lambda x: x[:8])
valid_blood_chem_nl = valid_blood_chem_nl[valid_blood_chem_nl['patient_id'].isin(valid_nl_sample['patient_id'])]
valid_blood_chem_nl = valid_blood_chem_nl.drop(['measurement_time'], axis = 1)
valid_blood_chem_nl = valid_blood_chem_nl.rename(columns={
    'Chloride (염소)': 'Chloride',
    'CRP (Quantitation)-임상화학': 'CRP'
})


--------------------------------------------------
Chem data


In [21]:
# adjust chem values
valid_blood_chem_nl = adjust_chem(valid_blood_chem_nl)
valid_blood_chem_nl = valid_blood_chem_nl.rename(columns = {
'Total Bilirubin': 'Total bilirubin',
'Total Protein': 'Total protein',
'Total Calcium': 'Total calcium',
'Alkaline Phosphatase' : 'Alkaline phosphatase',
'Creatinine': 'Creatinin'
})

#for p in blood_properties:
#  valid_blood[p].fillna(round(valid_blood[p].mean(), 1), inplace=True)


In [22]:

valid_blood_nl = pd.merge(valid_blood_cbc_nl, valid_blood_chem_nl, how = 'left', \
on = ['patient_id', 'adjusted_time'])

blood_properties = ['WBC Count', 'Platelet Count', 'Hgb','BUN', 'Creatinin', 'Glucose', 
                'Sodium', 'Potassium', 'Chloride', 'Total protein', 'Total bilirubin',
                'Albumin', 'CRP','Total calcium', 'Lactate', 'Alkaline phosphatase',
                'AST', 'ALT']
valid_blood_nl

Unnamed: 0,patient_x,adjusted_time,Hgb,platelet,WBC Count,measurement_time,patient_id,patient_y,ALT,AST,...,CRP,Chloride,Creatinin,Glucose,Lactate,Potassium,Sodium,Total bilirubin,Total calcium,Total protein
0,A1YFM8RK_20150910,2015-09-10 15:00:00,11.5,223.0,1,2015-09-10 14:35:00,A1YFM8RK,,,,...,,,,,,,,,,
1,A1YFM8RK_20151001,2015-10-01 16:00:00,11.2,245.0,1,2015-10-01 15:56:00,A1YFM8RK,,,,...,,,,,,,,,,
2,A1YFM8RK_20151022,2015-10-22 16:00:00,10.7,189.0,0,2015-10-22 16:16:00,A1YFM8RK,,,,...,,,,,,,,,,
3,A1YFM8RK_20151022,2015-10-22 17:00:00,10.7,189.0,0,2015-10-22 16:16:00,A1YFM8RK,,,,...,,,,,,,,,,
4,A1YFM8RK_20151022,2015-10-22 18:00:00,10.7,189.0,0,2015-10-22 16:16:00,A1YFM8RK,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296287,ZZF3O2KG_20100629,2010-07-05 08:00:00,11.5,262.0,0,2010-07-03 10:58:00,ZZF3O2KG,,,,...,,,,,,,,,,
296288,ZZF3O2KG_20100629,2010-07-05 09:00:00,11.5,262.0,0,2010-07-03 10:58:00,ZZF3O2KG,,,,...,,,,,,,,,,
296289,ZZF3O2KG_20100629,2010-07-05 10:00:00,11.5,262.0,0,2010-07-03 10:58:00,ZZF3O2KG,,,,...,,,,,,,,,,
296290,ZZF3O2KG_20100629,2010-07-05 11:00:00,11.5,262.0,0,2010-07-03 10:58:00,ZZF3O2KG,,,,...,,,,,,,,,,


In [23]:

valid_blood['adjusted_time'] = pd.to_datetime(valid_blood['adjusted_time'])




In [24]:
# merge blood data into valid_data
valid_data = get_merge_data(valid_data, valid_blood)


In [25]:
valid_data = valid_data.rename(columns = {
    'patient_id': 'Patient'
})
valid_data['is_abn'] = 1



In [26]:
valid_data.to_csv(os.path.join(dp.valid_path, 'valid_add_merge.csv'), index = False)



In [27]:
window_len = 8

var_list = list(set(valid_data.columns)
                - set([ 'Patient', 'adjusted_time', 'detection_time', 'measurement_time', 'event_time', 'target', 'is_abn']) )

In [30]:
var_list

['AST',
 'BUN',
 'Total protein',
 'Total bilirubin',
 'Hgb',
 'HR',
 'RR',
 'Alkaline phosphatase',
 'TS',
 'Sodium',
 'Total calcium',
 'platelet',
 'Creatinin',
 'CRP',
 'Glucose',
 'WBC Count',
 'BT',
 'SaO2',
 'ALT',
 'Lactate',
 'Albumin',
 'SBP',
 'Gender',
 'Potassium',
 'Chloride',
 'Age']

In [31]:
valid_blood_nl['adjusted_time'] = pd.to_datetime(valid_blood_nl['adjusted_time'])
valid_nl['adjusted_time'] = pd.to_datetime(valid_nl['adjusted_time'])

# merge blood data into valid_data
valid_data_nl = get_merge_data(valid_nl, valid_blood_nl)
 

In [32]:
valid_data_nl = valid_data_nl.drop(['measurement_time_y', 'patient_x', 'patient_y'], axis = 1)
valid_data_nl = valid_data_nl.rename(columns={
    'measurement_time_x': 'measurement_time'
})

In [33]:
valid_data_nl.columns

Index(['patient_id', 'adjusted_time', 'measurement_time', 'HR', 'RR', 'SBP',
       'SaO2', 'BT', 'Gender', 'target', 'TS', 'Age', 'Hgb', 'platelet',
       'WBC Count', 'ALT', 'AST', 'Albumin', 'Alkaline phosphatase', 'BUN',
       'CRP', 'Chloride', 'Creatinin', 'Glucose', 'Lactate', 'Potassium',
       'Sodium', 'Total bilirubin', 'Total calcium', 'Total protein'],
      dtype='object')

In [34]:
valid_data_nl = valid_data_nl.rename(columns = {
    'patient_id': 'Patient'
})
valid_data_nl['is_abn'] = 0

#valid_data_nl.to_csv(os.path.join(dp.valid_path, 'valid_nl_merge.csv'), index = False)


In [35]:
var_list

['AST',
 'BUN',
 'Total protein',
 'Total bilirubin',
 'Hgb',
 'HR',
 'RR',
 'Alkaline phosphatase',
 'TS',
 'Sodium',
 'Total calcium',
 'platelet',
 'Creatinin',
 'CRP',
 'Glucose',
 'WBC Count',
 'BT',
 'SaO2',
 'ALT',
 'Lactate',
 'Albumin',
 'SBP',
 'Gender',
 'Potassium',
 'Chloride',
 'Age']

In [40]:
len(var_list)

26

In [36]:
valid_nl

Unnamed: 0,patient_id,measurement_time,adjusted_time,HR,RR,SBP,SaO2,BT,Gender,target,TS,Age
0,A1YFM8RK,2016-12-14 17:00:00,2016-12-14 17:00:00,72.0,18.0,134.5,99.0,36.6,1,0,1,47
1,A1YFM8RK,2016-12-14 18:00:00,2016-12-14 18:00:00,62.0,18.0,135.0,99.0,37.0,1,0,2,47
2,A1YFM8RK,2016-12-14 19:00:00,2016-12-14 19:00:00,62.0,22.0,123.0,100.0,37.1,1,0,3,47
3,A1YFM8RK,2016-12-14 20:00:00,2016-12-14 20:00:00,64.0,19.0,118.0,100.0,36.9,1,0,4,47
4,A1YFM8RK,2016-12-14 21:00:00,2016-12-14 21:00:00,69.0,14.0,120.0,100.0,36.6,1,0,5,47
...,...,...,...,...,...,...,...,...,...,...,...,...
476325,ZYEBEIQI,2014-06-17 20:00:00,2014-06-17 13:00:00,103.0,22.0,106.0,98.0,36.4,1,0,168,44
476326,ZYEBEIQI,2014-06-17 21:00:00,2014-06-17 13:00:00,103.0,22.0,106.0,98.0,36.4,1,0,169,44
476327,ZYEBEIQI,2014-06-17 22:00:00,2014-06-17 13:00:00,103.0,22.0,106.0,98.0,36.4,1,0,170,44
476328,ZYEBEIQI,2014-06-17 23:00:00,2014-06-17 13:00:00,103.0,22.0,106.0,98.0,36.4,1,0,171,44


In [37]:
valid_data_nl['detection_time']= np.nan #valid_data_nl['measurement_time']
valid_data_nl['event_time']= np.nan #valid_data_nl['measurement_time']


In [41]:
var_list = list(set(valid_data.columns)
                - set([ 'Patient', 'adjusted_time', 'detection_time', 'measurement_time', 'event_time', 'target', 'is_abn']) )



총 환자수: 238
Window 크기: 8
-------------------- Making Data --------------------
=> 100번 째 환자
==> 200번 째 환자
-------------------- Done --------------------


In [42]:
valid_data = valid_data[((valid_data.detection_time.dt.day - valid_data.measurement_time.dt.day) < 5)]


In [43]:
valid_data.target.value_counts()

0.0    18792
1.0     3520
Name: target, dtype: int64

In [44]:
valid_seq_data = make_sequence_data(valid_data, window_len = window_len, var_list = var_list, index = 'Patient')


총 환자수: 230
Window 크기: 8
-------------------- Making Data --------------------
=> 100번 째 환자
==> 200번 째 환자
-------------------- Done --------------------


In [43]:
var_list

['Sodium',
 'SBP',
 'HR',
 'Gender',
 'Glucose',
 'Lactate',
 'Total bilirubin',
 'AST',
 'SaO2',
 'Alkaline phosphatase',
 'platelet',
 'BT',
 'Creatinin',
 'Age',
 'TS',
 'Total calcium',
 'ALT',
 'Hgb',
 'RR',
 'Chloride',
 'WBC Count',
 'Potassium',
 'BUN',
 'Total protein',
 'Albumin',
 'CRP']

In [45]:
valid_data_nl = valid_data_nl.groupby('Patient', as_index = False).tail(36)

In [48]:
window_len = 8

var_list = list(set(valid_data_nl.columns)
                - set([ 'Patient', 'adjusted_time', 'detection_time', 'measurement_time', 'event_time', 'target', 'is_abn']) )

In [49]:
len(var_list)

26

In [50]:
valid_seq_data_nl = make_sequence_data(valid_data_nl, window_len = window_len, var_list = var_list, index = 'Patient')

총 환자수: 597
Window 크기: 8
-------------------- Making Data --------------------
=> 100번 째 환자
==> 200번 째 환자
===> 300번 째 환자
====> 400번 째 환자
=====> 500번 째 환자
-------------------- Done --------------------


In [52]:
valid_seq = pd.concat([valid_seq_data, valid_seq_data_nl])

In [53]:
path = os.path.join(dp.valid_path, 'valid_add_seq.pickle')
with open(path, 'wb') as f:
    pickle.dump(valid_seq, f)

In [54]:
res_data = make_2d_data(valid_seq, var_list, \
    output_path = dp.valid_path, output_file = 'valid_add_final.csv')

column_name_multi:  ['AST-7', 'BUN-7', 'Total protein-7', 'Total bilirubin-7', 'Hgb-7', 'HR-7', 'RR-7', 'Alkaline phosphatase-7', 'TS-7', 'Sodium-7', 'Total calcium-7', 'platelet-7', 'Creatinin-7', 'CRP-7', 'Glucose-7', 'WBC Count-7', 'BT-7', 'SaO2-7', 'ALT-7', 'Lactate-7', 'Albumin-7', 'SBP-7', 'Gender-7', 'Potassium-7', 'Chloride-7', 'Age-7', 'AST-6', 'BUN-6', 'Total protein-6', 'Total bilirubin-6', 'Hgb-6', 'HR-6', 'RR-6', 'Alkaline phosphatase-6', 'TS-6', 'Sodium-6', 'Total calcium-6', 'platelet-6', 'Creatinin-6', 'CRP-6', 'Glucose-6', 'WBC Count-6', 'BT-6', 'SaO2-6', 'ALT-6', 'Lactate-6', 'Albumin-6', 'SBP-6', 'Gender-6', 'Potassium-6', 'Chloride-6', 'Age-6', 'AST-5', 'BUN-5', 'Total protein-5', 'Total bilirubin-5', 'Hgb-5', 'HR-5', 'RR-5', 'Alkaline phosphatase-5', 'TS-5', 'Sodium-5', 'Total calcium-5', 'platelet-5', 'Creatinin-5', 'CRP-5', 'Glucose-5', 'WBC Count-5', 'BT-5', 'SaO2-5', 'ALT-5', 'Lactate-5', 'Albumin-5', 'SBP-5', 'Gender-5', 'Potassium-5', 'Chloride-5', 'Age-5', '

In [1]:
import pandas as pd
valid_final = pd.read_csv('/media/nghia/Nguyen NghiaW/RRS-2021/20210322_RRS/RRS/10yrs_refined_data/valid_add_final.csv')
valid_final

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,AST-7,BUN-7,Total protein-7,Total bilirubin-7,Hgb-7,HR-7,RR-7,Alkaline phosphatase-7,Sodium-7,Total calcium-7,...,RoC_Chloride-5,RoC_Chloride-4,RoC_Chloride-3,RoC_Chloride-2,RoC_Chloride-1,target,Patient,measurement_time,detection_time,event_time
0,,,,,,103.0,24.0,,,,...,,,,,,1.0,ABG1W8Y6,2009-09-28 12:00:00,2009-09-28 06:00:00,2009-09-28 21:02:00
1,,,,,,91.0,19.0,,,,...,,,,,,1.0,ABG1W8Y6,2009-09-28 13:00:00,2009-09-28 06:00:00,2009-09-28 21:02:00
2,,,,,13.6,118.0,19.0,,,,...,,,,,,0.0,ARIMNPWE,2009-02-17 20:00:00,2009-03-04 09:50:00,2009-03-04 12:32:00
3,,,,,13.6,120.0,21.0,,,,...,,,,,,0.0,ARIMNPWE,2009-02-17 21:00:00,2009-03-04 09:50:00,2009-03-04 12:32:00
4,,,,,13.6,112.0,22.0,,,,...,,,,,,0.0,ARIMNPWE,2009-02-17 22:00:00,2009-03-04 09:50:00,2009-03-04 12:32:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37187,,,,,8.2,99.0,29.0,,,,...,,,,,,0.0,ZYEBEIQI,2014-06-17 12:00:00,,
37188,,,,,8.2,103.0,22.0,,,,...,,,,,,0.0,ZYEBEIQI,2014-06-17 13:00:00,,
37189,,,,,8.2,103.0,22.0,,,,...,,,,,,0.0,ZYEBEIQI,2014-06-17 13:00:00,,
37190,,,,,8.2,103.0,22.0,,,,...,,,,,,0.0,ZYEBEIQI,2014-06-17 13:00:00,,


In [2]:
import pickle
valid_f = pickle.load(open('/media/nghia/Nguyen NghiaW/RRS-2021/20210322_RRS/RRS/10yrs_refined_data/valid_add_final.pickle', 'rb'))
valid_f

Unnamed: 0,AST-7,BUN-7,Total protein-7,Total bilirubin-7,Hgb-7,HR-7,RR-7,Alkaline phosphatase-7,Sodium-7,Total calcium-7,...,RoC_Chloride-5,RoC_Chloride-4,RoC_Chloride-3,RoC_Chloride-2,RoC_Chloride-1,target,Patient,measurement_time,detection_time,event_time
0,,,,,,103.0,24.0,,,,...,,,,,,1.0,ABG1W8Y6,2009-09-28 12:00:00,2009-09-28 06:00:00,2009-09-28 21:02:00
1,,,,,,91.0,19.0,,,,...,,,,,,1.0,ABG1W8Y6,2009-09-28 13:00:00,2009-09-28 06:00:00,2009-09-28 21:02:00
2,,,,,13.6,118.0,19.0,,,,...,,,,,,0.0,ARIMNPWE,2009-02-17 20:00:00,2009-03-04 09:50:00,2009-03-04 12:32:00
3,,,,,13.6,120.0,21.0,,,,...,,,,,,0.0,ARIMNPWE,2009-02-17 21:00:00,2009-03-04 09:50:00,2009-03-04 12:32:00
4,,,,,13.6,112.0,22.0,,,,...,,,,,,0.0,ARIMNPWE,2009-02-17 22:00:00,2009-03-04 09:50:00,2009-03-04 12:32:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37187,,,,,8.2,99.0,29.0,,,,...,,,,,,0.0,ZYEBEIQI,2014-06-17 12:00:00,NaT,NaT
37188,,,,,8.2,103.0,22.0,,,,...,,,,,,0.0,ZYEBEIQI,2014-06-17 13:00:00,NaT,NaT
37189,,,,,8.2,103.0,22.0,,,,...,,,,,,0.0,ZYEBEIQI,2014-06-17 13:00:00,NaT,NaT
37190,,,,,8.2,103.0,22.0,,,,...,,,,,,0.0,ZYEBEIQI,2014-06-17 13:00:00,NaT,NaT


In [5]:
import os
import pandas as pd
import numpy as np
import pickle

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn

import tensorflow as tf
from tensorflow.keras.models import model_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Flatten, Dense, Embedding
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras import metrics
from tensorflow.keras.callbacks import Callback, EarlyStopping

#from rrs_kit.DataClass import DataPath, VarSet
import mews
from mews import load_modeling_data

ModuleNotFoundError: No module named 'mews'