In [None]:
'''
    Preprocessing AJ Ventilator Parameters...

        (1) Input
            (1-1) AJ_total_data.csv (Vital-sign + etc)   
            (1-2) AJ_vent.xlsx (Ventilator parameters)

        (2) Output
            (2-1) 
            


'''

In [8]:
import pandas as pd


data = pd.read_excel('./raw_data/AJ_vent.xlsx')
data

Unnamed: 0,timestamp,observation,observation_value,observation_value_unit
0,20190622152801,VITAL_EVITAPmean,8.00,cmH2O
1,20190622152801,VITAL_EVITAPEEP,5.00,cmH2O
2,20190622152801,VITAL_EVITAPpeak,14.00,cmH2O
3,20190622152801,VITAL_EVITATV,0.42,L
4,20190622152801,VITAL_EVITAMV,7.90,L/min
...,...,...,...,...
40472,20190625190201,VITAL_EVITAPpeak,7.00,cmH2O
40473,20190625190201,VITAL_EVITATV,0.36,L
40474,20190625190201,VITAL_EVITAMV,12.30,L/min
40475,20190625190201,VITAL_EVITARR,30.00,/min


In [9]:
import tqdm

ts_list = []

for idx, value in tqdm.tqdm(data.iterrows()):
    ts = str(value[0])[:-2]

    ts_list.append(ts)

data['timestamp'] = ts_list
data

40477it [00:01, 38411.74it/s]


Unnamed: 0,timestamp,observation,observation_value,observation_value_unit
0,201906221528,VITAL_EVITAPmean,8.00,cmH2O
1,201906221528,VITAL_EVITAPEEP,5.00,cmH2O
2,201906221528,VITAL_EVITAPpeak,14.00,cmH2O
3,201906221528,VITAL_EVITATV,0.42,L
4,201906221528,VITAL_EVITAMV,7.90,L/min
...,...,...,...,...
40472,201906251902,VITAL_EVITAPpeak,7.00,cmH2O
40473,201906251902,VITAL_EVITATV,0.36,L
40474,201906251902,VITAL_EVITAMV,12.30,L/min
40475,201906251902,VITAL_EVITARR,30.00,/min


In [10]:
data['timestamp'] = pd.to_datetime(data['timestamp'])
data

Unnamed: 0,timestamp,observation,observation_value,observation_value_unit
0,2019-06-22 15:28:00,VITAL_EVITAPmean,8.00,cmH2O
1,2019-06-22 15:28:00,VITAL_EVITAPEEP,5.00,cmH2O
2,2019-06-22 15:28:00,VITAL_EVITAPpeak,14.00,cmH2O
3,2019-06-22 15:28:00,VITAL_EVITATV,0.42,L
4,2019-06-22 15:28:00,VITAL_EVITAMV,7.90,L/min
...,...,...,...,...
40472,2019-06-25 19:02:00,VITAL_EVITAPpeak,7.00,cmH2O
40473,2019-06-25 19:02:00,VITAL_EVITATV,0.36,L
40474,2019-06-25 19:02:00,VITAL_EVITAMV,12.30,L/min
40475,2019-06-25 19:02:00,VITAL_EVITARR,30.00,/min


In [11]:
set(data['observation'])

{'VITAL_EVITAFiO2',
 'VITAL_EVITAMV',
 'VITAL_EVITAPEEP',
 'VITAL_EVITAPmean',
 'VITAL_EVITAPpeak',
 'VITAL_EVITAPplat',
 'VITAL_EVITARR',
 'VITAL_EVITATV'}

In [12]:
import tqdm

features = []

for idx, value in tqdm.tqdm(data.iterrows()):
    obs = value[1]

    if obs[-4:] == 'FiO2':
        tmp = 'FiO2'
    elif obs[-2:] == 'MV':
        tmp = 'MV'
    elif obs[-4:] == 'PEEP':
        tmp = 'PEEP'
    elif obs[-5:] == 'Pmean':
        tmp = 'Pmean'
    elif obs[-5:] == 'Ppeak':
        tmp = 'Ppeak'
    elif obs[-5:] == 'Pplat':
        tmp = 'Pplat'
    elif obs[-2:] == 'RR':
        tmp = 'RR'
    elif obs[-2:] == 'TV':
        tmp = 'TV'

    features.append(tmp)

data['features'] = features
data

40477it [00:01, 36451.64it/s]


Unnamed: 0,timestamp,observation,observation_value,observation_value_unit,features
0,2019-06-22 15:28:00,VITAL_EVITAPmean,8.00,cmH2O,Pmean
1,2019-06-22 15:28:00,VITAL_EVITAPEEP,5.00,cmH2O,PEEP
2,2019-06-22 15:28:00,VITAL_EVITAPpeak,14.00,cmH2O,Ppeak
3,2019-06-22 15:28:00,VITAL_EVITATV,0.42,L,TV
4,2019-06-22 15:28:00,VITAL_EVITAMV,7.90,L/min,MV
...,...,...,...,...,...
40472,2019-06-25 19:02:00,VITAL_EVITAPpeak,7.00,cmH2O,Ppeak
40473,2019-06-25 19:02:00,VITAL_EVITATV,0.36,L,TV
40474,2019-06-25 19:02:00,VITAL_EVITAMV,12.30,L/min,MV
40475,2019-06-25 19:02:00,VITAL_EVITARR,30.00,/min,RR


In [13]:
set(data['features'])

{'FiO2', 'MV', 'PEEP', 'Pmean', 'Ppeak', 'Pplat', 'RR', 'TV'}

In [16]:
total_df = pd.read_csv('./raw_data/AJ_total_data.csv', parse_dates=['timestamp'])
total_df

Unnamed: 0,timestamp,SBP_NIBP,DBP_NIBP,MBP_NIBP,ASBP,ADBP,AMBP,HR,RR,BT,...,set_TV,MV,actual_VT,set,Ppeak,Pplat,Pmean,PEEP,label,MV_check
0,2019-06-22 11:22:00,166.0,84.0,,,,,,,,...,,,,,,,,,0,1
1,2019-06-22 12:00:00,,,,138.0,45.0,76.0,60.0,30.0,,...,,,,,,,,,0,1
2,2019-06-22 13:00:00,,,,112.0,36.0,66.0,53.0,24.0,37.1,...,,,,,,,,5.0,0,1
3,2019-06-22 14:00:00,,,,127.0,57.0,80.0,53.0,23.0,,...,,,,,,,,,0,1
4,2019-06-22 15:23:00,,,,116.0,51.0,73.0,51.0,18.0,,...,,,,,,,,,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134,2019-06-28 01:00:00,,,,101.0,48.0,61.0,44.0,24.0,36.3,...,,,,,,,,,1,0
135,2019-06-28 02:00:00,,,,85.0,52.0,60.0,42.0,20.0,36.5,...,,,,,,,,,1,0
136,2019-06-28 03:00:00,,,,81.0,39.0,51.0,39.0,15.0,36.4,...,,,,,,,,,1,0
137,2019-06-28 04:00:00,,,,148.0,122.0,129.0,20.0,19.0,36.6,...,,,,,,,,,1,0


In [17]:
from datetime import timedelta
import numpy as np

row_idx = 0


obs_dict = dict()

f_list = ['FiO2', 'MV', 'PEEP', 'Pmean', 'Ppeak', 'Pplat', 'RR', 'TV']

for i in f_list:
    obs_dict[i] = []


for idx,values in tqdm.tqdm(total_df.iterrows()):

    if row_idx == len(total_df)-1:
        ts = total_df['timestamp'][row_idx]
        ts_next_row = ts + timedelta(hours=1)

    else:
        ts = total_df['timestamp'][row_idx]
        ts_next_row = total_df['timestamp'][row_idx+1]

    for f in f_list:

        tmp_val = np.nan

        tmp_df = list(data[(data['features']==f) & (data['timestamp'] >= ts) & (data['timestamp'] <= ts_next_row)]['observation_value'])
        tmp_val = round(np.mean(tmp_df),1)
    
        obs_dict[f].append(tmp_val)

    row_idx += 1

total_df['FiO2'] = obs_dict['FiO2']
total_df['set_TV'] = obs_dict['TV']
total_df['MV'] = obs_dict['MV']
total_df['Ppeak'] = obs_dict['Ppeak']
total_df['Pplat'] = obs_dict['Pplat']
total_df['Pmean'] = obs_dict['Pmean']
total_df['PEEP'] = obs_dict['PEEP']
total_df

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
139it [00:02, 46.96it/s]


Unnamed: 0,timestamp,SBP_NIBP,DBP_NIBP,MBP_NIBP,ASBP,ADBP,AMBP,HR,RR,BT,...,set_TV,MV,actual_VT,set,Ppeak,Pplat,Pmean,PEEP,label,MV_check
0,2019-06-22 11:22:00,166.0,84.0,,,,,,,,...,,,,,,,,,0,1
1,2019-06-22 12:00:00,,,,138.0,45.0,76.0,60.0,30.0,,...,,,,,,,,,0,1
2,2019-06-22 13:00:00,,,,112.0,36.0,66.0,53.0,24.0,37.1,...,,,,,,,,,0,1
3,2019-06-22 14:00:00,,,,127.0,57.0,80.0,53.0,23.0,,...,0.3,7.4,,,23.0,,8.7,5.0,0,1
4,2019-06-22 15:23:00,,,,116.0,51.0,73.0,51.0,18.0,,...,0.5,12.8,,,14.8,,8.7,5.4,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134,2019-06-28 01:00:00,,,,101.0,48.0,61.0,44.0,24.0,36.3,...,,,,,,,,,1,0
135,2019-06-28 02:00:00,,,,85.0,52.0,60.0,42.0,20.0,36.5,...,,,,,,,,,1,0
136,2019-06-28 03:00:00,,,,81.0,39.0,51.0,39.0,15.0,36.4,...,,,,,,,,,1,0
137,2019-06-28 04:00:00,,,,148.0,122.0,129.0,20.0,19.0,36.6,...,,,,,,,,,1,0


In [18]:
total_df.to_csv('./input_data/AJ_results.csv')