In [1]:
import numpy as np
import pandas as pd
import psycopg2
import getpass
# for configuring connection 
from configobj import ConfigObj
import os


In [2]:
# Create a database connection using settings from config file
config='../db/config.ini'

# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = ''
    conn_info["sqlhost"] = '192.168.60.144'
    conn_info["sqlport"] = 6432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu_crd'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == '192.168.60.144') & (conn_info["sqlport"]=='6432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   host=conn_info["sqlhost"],
                                   port=conn_info["sqlport"],
                                   user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')

        con = psycopg2.connect(dbname=conn_info["dbname"],
                               host=conn_info["sqlhost"],
                               port=conn_info["sqlport"],
                               user=conn_info["sqluser"],
                               password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'

Database: eicu
Username: postgres


In [3]:
from sqlalchemy import create_engine,text
con= create_engine('postgresql://eicu@192.168.60.144:6432/eicu')

In [4]:
datadir = '/home/mei/nas/docker/dataset/EICU/eicu-collaborative-research-database-2.0/'
csv = '/home/mei/nas/docker/thesis/data/csv/'

## flat_feature table: 
- select patient who comes from direct and emergency and the stay length in icu between 1 and 14 days

In [36]:
create_table_query =  query_schema +"""

drop materialized view if exists flat cascade;
create materialized view flat as
SELECT DISTINCT patientunitstayid, gender, age, apacheadmissiondx, admissionweight,dischargeweight, unitdischargelocation,unitdischargestatus
FROM patient
WHERE unitadmitsource IN ('Emergency Department', 'Direct Admit') 
    AND unitdischargeoffset >=1440 
    AND unitdischargeoffset <=20160
group by patientunitstayid
order by patientunitstayid
"""

In [37]:
with con.begin() as connection:
    connection.execute(text(create_table_query))
    
select_query = "SELECT * FROM flat;"
df_flat = pd.read_sql_query(select_query, con)

In [38]:
df_flat

Unnamed: 0,patientunitstayid,gender,age,apacheadmissiondx,admissionweight,dischargeweight,unitdischargelocation,unitdischargestatus
0,141168,Female,70,"Rhythm disturbance (atrial, supraventricular)",84.30,85.8,Death,Expired
1,141265,Male,67,"CVA, cerebrovascular accident/stroke",100.00,91.8,Floor,Alive
2,141266,Male,73,"Sepsis, renal/UTI (including bladder)",120.40,112.9,Floor,Alive
3,141276,Female,59,"Arrest, respiratory (without cardiac arrest)",156.60,156.6,Home,Alive
4,141284,Male,63,Anemia,,88.5,Floor,Alive
...,...,...,...,...,...,...,...,...
68441,3353147,Male,24,"Overdose, sedatives, hypnotics, antipsychotics...",153.40,151.3,Floor,Alive
68442,3353194,Female,51,Cardiac arrest (with or without respiratory ar...,63.05,65.8,Death,Expired
68443,3353213,Female,51,Coma/change in level of consciousness (for hep...,54.40,59.4,Home,Alive
68444,3353226,Female,79,"Effusions, pleural",58.40,55.1,Death,Expired


##  labels
- unitdischarge location, unitdiscahrges status, actualiculos

In [103]:
create_table_query = query_schema + """
DROP MATERIALIZED VIEW  IF EXISTS labels CASCADE;
CREATE MATERIALIZED VIEW labels AS
SELECT DISTINCT ON (f.patientunitstayid) 
    f.patientunitstayid,
    f.dischargeweight,
    f.unitdischargelocation,
    f.unitdischargestatus,
    apr.actualiculos
FROM flat AS f
INNER JOIN apachepatientresult AS apr 
ON f.patientunitstayid = apr.patientunitstayid;
"""

In [104]:
with con.begin() as connection:
    connection.execute(text(create_table_query)) 
select_query = "SELECT * FROM labels;"
df_labels = pd.read_sql_query(select_query, con)

In [105]:
df_labels

Unnamed: 0,patientunitstayid,dischargeweight,unitdischargelocation,unitdischargestatus,actualiculos
0,141168,85.8,Death,Expired,2.4972
1,141265,91.8,Floor,Alive,4.2138
2,141266,112.9,Floor,Alive,1.0423
3,141276,156.6,Home,Alive,1.1694
4,141284,88.5,Floor,Alive,1.4416
...,...,...,...,...,...
60330,3353147,151.3,Floor,Alive,1.0888
60331,3353194,65.8,Death,Expired,2.4930
60332,3353213,59.4,Home,Alive,3.4118
60333,3353226,55.1,Death,Expired,8.0368


## diagnosis

In [110]:
create_table_query =  query_schema +"""
drop materialized view if exists diagnoses cascade;
create materialized view diagnoses as

  -- for past medical history:
  select ph.patientunitstayid, ph.pasthistorypath as diagnosisstring
    from pasthistory as ph
    inner join labels as l on l.patientunitstayid = ph.patientunitstayid
    where ph.pasthistoryoffset > 0
    and ph.pasthistorypath LIKE '%Organ Systems%'
       
"""

In [111]:
with con.begin() as connection:
    connection.execute(text(create_table_query)) 
    
select_query = "SELECT * FROM diagnoses;"
df_dx = pd.read_sql_query(select_query, con)

In [112]:
df_dx

Unnamed: 0,patientunitstayid,diagnosisstring
0,141168,notes/Progress Notes/Past History/Organ System...
1,141168,notes/Progress Notes/Past History/Organ System...
2,141168,notes/Progress Notes/Past History/Organ System...
3,141168,notes/Progress Notes/Past History/Organ System...
4,141168,notes/Progress Notes/Past History/Organ System...
...,...,...
279753,3353251,notes/Progress Notes/Past History/Organ System...
279754,3353251,notes/Progress Notes/Past History/Organ System...
279755,3353251,notes/Progress Notes/Past History/Organ System...
279756,3353251,notes/Progress Notes/Past History/Organ System...


## medication

In [113]:
query = query_schema + """
select a.patientunitstayid, a.drugname
from admissiondrug a
inner join labels l on a.patientunitstayid = l.patientunitstayid
where a.drugoffset > 0
order by a.patientunitstayid
"""

In [114]:
df_drug = pd.read_sql_query(query,con)

In [115]:
df_drug

Unnamed: 0,patientunitstayid,drugname
0,252784,MULTIVITAMIN ...
1,252784,NITROGLYCERIN ...
2,252784,LISINOPRIL ...
3,252784,LANTUS ...
4,252784,CHOLECALCIFEROL (VITAMIN D3) ...
...,...,...
285418,3348105,METFORMIN HCL ...
285419,3348105,TRIAMTERENE W/HCTZ ...
285420,3348105,DILTIAZEM HCL ...
285421,3348105,TRIAMTERENE W/HCTZ ...


## lab_ ts

In [119]:
create_table_query =  query_schema +"""
drop materialized view if exists timeserieslab cascade;
create materialized view timeserieslab as
  select l.patientunitstayid, l.labresultoffset, l.labname
    from lab as l
    inner join labels as la
      on la.patientunitstayid = l.patientunitstayid -- only extract data for the cohort
    where l.labresultoffset > 0;

"""

In [120]:
with con.begin() as connection:
    connection.execute(text(create_table_query)) 

select_query = "SELECT * FROM timeserieslab;"
df_timeserieslab = pd.read_sql_query(select_query, con)

In [121]:
df_timeserieslab

Unnamed: 0,patientunitstayid,labresultoffset,labname
0,141168,2026,fibrinogen
1,141168,1133,PT - INR
2,141168,2026,magnesium
3,141168,1133,PT
4,141168,2141,pH
...,...,...,...
10434833,3353251,4049,bedside glucose
10434834,3353251,1849,BUN
10434835,3353251,310,pH
10434836,3353251,409,potassium


In [122]:
min_offset = df_timeserieslab["labresultoffset"].min()
max_offset = df_timeserieslab["labresultoffset"].max()

print(f"最小 labresultoffset: {min_offset}")
print(f"最大 labresultoffset: {max_offset}")

# 找到最小和最大的 labresultoffset 对应的索引
min_index = df_timeserieslab["labresultoffset"].idxmin()
max_index = df_timeserieslab["labresultoffset"].idxmax()

# 打印对应的行
print("最小 labresultoffset 对应的行：")
print(df_timeserieslab.loc[min_index])

print("\n最大 labresultoffset 对应的行：")
print(df_timeserieslab.loc[max_index])

最小 labresultoffset: 1
最大 labresultoffset: 526696
最小 labresultoffset 对应的行：
patientunitstayid             143471
labresultoffset                    1
labname              bedside glucose
Name: 12470, dtype: object

最大 labresultoffset 对应的行：
patientunitstayid    2758367
labresultoffset       526696
labname              lactate
Name: 7643877, dtype: object


## vital periodic

In [123]:
create_table_query =  query_schema +"""
drop materialized view if exists timeseriesperiodic cascade;
create materialized view timeseriesperiodic as
  select vp.patientunitstayid, vp.observationoffset, vp.temperature, vp.sao2, vp.heartrate, vp.respiration, vp.cvp,
    vp.systemicsystolic, vp.systemicdiastolic, vp.systemicmean, vp.st1, vp.st2, vp.st3
    from vitalperiodic as vp
    -- select only the patients who are in the cohort
    inner join labels as la
      on la.patientunitstayid = vp.patientunitstayid
    where vp.observationoffset > 0
    order by vp.patientunitstayid, vp.observationoffset;
"""

In [125]:
with con.begin() as connection:
    connection.execute(text(create_table_query)) 

select_query = "SELECT * FROM timeseriesperiodic;"
df_timeseriesperiodic = pd.read_sql_query(select_query, con)

In [127]:
min = df_timeseriesperiodic["observationoffset"].min()
max = df_timeseriesperiodic["observationoffset"].max()

print(f"最小 observationoffset: {min_offset}")
print(f"最大 observationoffset {max_offset}")


最小 observationoffset: 1
最大 observationoffset 526696


In [126]:
df_timeseriesperiodic

Unnamed: 0,patientunitstayid,observationoffset,temperature,sao2,heartrate,respiration,cvp,systemicsystolic,systemicdiastolic,systemicmean,st1,st2,st3
0,141168,119,,93.0,140.0,,,,,,,,
1,141168,124,,,140.0,,,,,,,,
2,141168,129,,,140.0,,,,,,,,
3,141168,134,,,140.0,,,,,,,,
4,141168,139,,,140.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51285007,3353251,16219,,84.0,73.0,,,,,,,,
51285008,3353251,16224,,68.0,74.0,,,,,,,,
51285009,3353251,16229,,82.0,73.0,,,,,,,,
51285010,3353251,16234,,95.0,70.0,,,,,,,,


In [28]:
# df_dx=pd.read_csv(csv+'/diagnoses.csv')
# df_timeserieslab=pd.read_csv(csv+'/timeserieslab.csv')
# df_timeseriesperiodic=pd.read_csv(csv+'/timeseriesperiodic.csv')
# df_labels=pd.read_csv(csv+'/labels.csv')
# df_flat=pd.read_csv(csv+'/flat.csv')


In [128]:
common_ids = set(df_labels['patientunitstayid'].unique()) \
             .intersection(df_dx['patientunitstayid'].unique()) \
             .intersection(df_timeserieslab['patientunitstayid'].unique()) \
             .intersection(df_timeseriesperiodic['patientunitstayid'].unique()) \
             .intersection(df_flat['patientunitstayid'].unique())\
             .intersection(df_drug['patientunitstayid'].unique())

In [130]:
common_ids_df = pd.DataFrame(list(common_ids), columns=['patientunitstayid'])
common_ids_df.to_csv(csv+'/common_ids.csv', index=False)

In [10]:
# common_ids=pd.read_csv(csv+'/common_ids.csv')

In [129]:
len(common_ids)

12260

In [131]:
def filter_and_sort(dataframes, common_ids, id_column='patientunitstayid'):
    """
    Filter and sort dataframes by common
    """
    return [
        df[df[id_column].isin(common_ids)].sort_values(by=id_column)
        for df in dataframes
    ]

In [132]:
dataframes = [df_labels, df_dx, df_timeserieslab, df_timeseriesperiodic, df_flat, df_drug]
filtered_sorted_dataframes = filter_and_sort(dataframes, common_ids)

# back to the original dataframes
df_labels, df_dx, df_timeserieslab, df_timeseriesperiodic, df_flat,df_drug = filtered_sorted_dataframes

In [133]:
def export_table_to_csv(df, output_file):
    """
    Export a SQL query to a CSV file
    """
    df.to_csv(output_file, index=False)
    print(f"Exported {output_file}")


In [134]:
## labels
export_table_to_csv(df_labels, f"{csv}labels.csv")
## static data
export_table_to_csv(df_dx, f"{csv}diagnoses.csv")
export_table_to_csv(df_flat, f"{csv}flat.csv")
export_table_to_csv(df_drug, f"{csv}drug.csv")

## time series data
export_table_to_csv(df_timeserieslab, f"{csv}timeserieslab.csv")
export_table_to_csv(df_timeseriesperiodic, f"{csv}timeseriesperiodic.csv")


Exported /home/mei/nas/docker/thesis/data/csv/labels.csv
Exported /home/mei/nas/docker/thesis/data/csv/diagnoses.csv
Exported /home/mei/nas/docker/thesis/data/csv/flat.csv
Exported /home/mei/nas/docker/thesis/data/csv/drug.csv
Exported /home/mei/nas/docker/thesis/data/csv/timeserieslab.csv
Exported /home/mei/nas/docker/thesis/data/csv/timeseriesperiodic.csv


In [135]:
df_flat

Unnamed: 0,patientunitstayid,gender,age,apacheadmissiondx,admissionweight,dischargeweight,unitdischargelocation,unitdischargestatus
4177,252784,Male,56,Diabetic ketoacidosis,75.00,75.7,Floor,Alive
4189,253331,Male,76,"CHF, congestive heart failure",114.70,114.4,Floor,Alive
4212,255112,Male,52,Cardiac arrest (with or without respiratory ar...,50.60,58.2,Death,Expired
4280,258354,Female,61,"Hypertension, uncontrolled (for cerebrovascula...",74.38,75.3,Floor,Alive
4300,259414,Male,81,"CVA, cerebrovascular accident/stroke",107.50,106.7,Floor,Alive
...,...,...,...,...,...,...,...,...
66723,3247360,Female,25,"Sepsis, renal/UTI (including bladder)",48.90,60.3,Floor,Alive
66726,3247421,Male,59,"Sepsis, pulmonary",76.20,82.1,Floor,Alive
68010,3346588,Female,71,"CHF, congestive heart failure",121.50,115.3,Telemetry,Alive
68073,3347496,Female,56,"Overdose, sedatives, hypnotics, antipsychotics...",71.20,71.3,Floor,Alive


In [136]:
df_labels

Unnamed: 0,patientunitstayid,dischargeweight,unitdischargelocation,unitdischargestatus,actualiculos
4005,252784,75.7,Floor,Alive,2.0500
4016,253331,114.4,Floor,Alive,1.7625
4037,255112,58.2,Death,Expired,10.7381
4102,258354,75.3,Floor,Alive,3.0090
4122,259414,106.7,Floor,Alive,5.8562
...,...,...,...,...,...
58668,3247360,60.3,Floor,Alive,3.0097
58671,3247421,82.1,Floor,Alive,2.9520
59916,3346588,115.3,Telemetry,Alive,2.1826
59978,3347496,71.3,Floor,Alive,1.5340


In [137]:
df_dx

Unnamed: 0,patientunitstayid,diagnosisstring
10195,252784,notes/Progress Notes/Past History/Organ System...
10194,252784,notes/Progress Notes/Past History/Organ System...
10193,252784,notes/Progress Notes/Past History/Organ System...
10335,253331,notes/Progress Notes/Past History/Organ System...
10340,253331,notes/Progress Notes/Past History/Organ System...
...,...,...
278651,3348105,notes/Progress Notes/Past History/Organ System...
278650,3348105,notes/Progress Notes/Past History/Organ System...
278649,3348105,notes/Progress Notes/Past History/Organ System...
278655,3348105,notes/Progress Notes/Past History/Organ System...


In [138]:
df_drug

Unnamed: 0,patientunitstayid,drugname
0,252784,MULTIVITAMIN ...
10,252784,FLAX SEED OIL ...
9,252784,OMEPRAZOLE ...
8,252784,CARVEDILOL ...
7,252784,NOVOLOG ...
...,...,...
285404,3348105,PRAVASTATIN SODIUM ...
285403,3348105,OMEPRAZOLE ...
285402,3348105,RAMIPRIL ...
285411,3348105,METFORMIN HCL ...


In [139]:
df_timeserieslab

Unnamed: 0,patientunitstayid,labresultoffset,labname
650863,252784,1080,bedside glucose
650980,252784,786,bedside glucose
650981,252784,1240,AST (SGOT)
650982,252784,785,BUN
650983,252784,1020,BUN
...,...,...,...
10379171,3348105,4493,glucose
10379172,3348105,685,anion gap
10379173,3348105,2607,bedside glucose
10379204,3348105,4493,sodium


In [140]:
df_timeseriesperiodic

Unnamed: 0,patientunitstayid,observationoffset,temperature,sao2,heartrate,respiration,cvp,systemicsystolic,systemicdiastolic,systemicmean,st1,st2,st3
3252945,252784,10,,100.0,106.0,22.0,,,,,0.05,0.05,0.0
3253330,252784,1940,,97.0,93.0,21.0,,,,,0.00,0.00,0.0
3253331,252784,1945,,98.0,92.0,21.0,,,,,0.00,0.00,0.0
3253332,252784,1950,,98.0,90.0,21.0,,,,,0.00,0.00,0.0
3253333,252784,1955,,98.0,87.0,20.0,,,,,0.00,0.10,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51005542,3348105,1428,,100.0,73.0,,,,,,,,
51005543,3348105,1433,,100.0,72.0,,,,,,,,
51005544,3348105,1438,,100.0,72.0,,,,,,,,
51005546,3348105,1448,,100.0,72.0,,,,,,,,


## get the common ids in all the processed tables

In [3]:
diagnoses = pd.read_csv(csv+'/preprocessed_diagnoses.csv')
flat_drug = pd.read_csv(csv+'/preprocessed_flat_drug.csv')
flat = pd.read_csv(csv+'/preprocessed_flat.csv')
labels = pd.read_csv(csv+'/preprocessed_labels.csv')
timeseries = pd.read_csv(csv+'/preprocessed_timeseries.csv',low_memory=False)

In [4]:
timeseries.rename(columns={'Unnamed: 1': 'time'}, inplace=True)

In [6]:
flat_drug.drop(columns=['hour'], inplace=True)

In [7]:
common_ids = set(labels['patient'].unique())\
               .intersection(diagnoses['patient'].unique())\
                .intersection(flat_drug['patient'].unique())\
                .intersection(flat['patient'].unique())\
                .intersection(timeseries['patient'].unique())

In [None]:
## save common id
common_ids= pd.DataFrame(list(common_ids), columns=['patientunitstayid'])
common_ids.to_csv(csv+'/common_ids.csv', index=False)

In [10]:
len(common_ids)

9451

In [11]:
def filter_and_sort_patient(dataframes, common_ids, id_column='patient'):
    """
    Filter and sort dataframes by common
    """
    return [
        df[df[id_column].isin(common_ids)].sort_values(by=id_column)
        for df in dataframes
    ]

In [12]:
dataframes = [labels, diagnoses, flat_drug, flat, timeseries]
filtered_sorted_dataframes = filter_and_sort_patient(dataframes, common_ids)

# back to the original dataframes
labels, diagnoses, flat_drug, flat, timeseries= filtered_sorted_dataframes

In [20]:
## labels
export_table_to_csv(labels, f"{csv}/final_labels.csv")

## static data
export_table_to_csv(diagnoses, f"{csv}/final_diagnoses.csv")
export_table_to_csv(flat_drug, f"{csv}/final_flat_drug.csv")
export_table_to_csv(flat, f"{csv}/final_flat.csv")

## time series data
export_table_to_csv(timeseries, f"{csv}/final_timeseries.csv")


Exported /home/mei/nas/docker/thesis/data/csv//final_labels.csv
Exported /home/mei/nas/docker/thesis/data/csv//final_diagnoses.csv
Exported /home/mei/nas/docker/thesis/data/csv//final_flat_drug.csv
Exported /home/mei/nas/docker/thesis/data/csv//final_flat.csv
Exported /home/mei/nas/docker/thesis/data/csv//final_timeseries.csv


In [15]:
labels

Unnamed: 0,patient,actualiculos,unitdischargestatus,dischargeweight,discharge_risk_category
0,252784,2.0500,0,75.7,1
1,253331,1.7625,0,114.4,1
2,255112,10.7381,1,58.2,4
3,258354,3.0090,0,75.3,1
4,259414,5.8562,0,106.7,1
...,...,...,...,...,...
9813,3247094,4.0041,0,90.3,1
9814,3247116,8.1256,1,68.0,4
9816,3247421,2.9520,0,82.1,1
9817,3346588,2.1826,0,115.3,2


In [16]:
diagnoses

Unnamed: 0,patient,Cardiovascular (R),Cardiovascular (R)|AICD,Cardiovascular (R)|Angina,Cardiovascular (R)|Arrhythmias,Cardiovascular (R)|Arrhythmias|atrial fibrillation - chronic,Cardiovascular (R)|Arrhythmias|atrial fibrillation - intermittent,Cardiovascular (R)|Congestive Heart Failure,Cardiovascular (R)|Congestive Heart Failure|CHF,Cardiovascular (R)|Congestive Heart Failure|CHF - severity unknown,...,"apacheadmissiondx_Rhythm disturbance (atrial, supraventricular)",apacheadmissiondx_Rhythm disturbance (conduction defect),apacheadmissiondx_Seizures (primary-no structural brain disease),"apacheadmissiondx_Sepsis, GI","apacheadmissiondx_Sepsis, cutaneous/soft tissue","apacheadmissiondx_Sepsis, other","apacheadmissiondx_Sepsis, pulmonary","apacheadmissiondx_Sepsis, renal/UTI (including bladder)","apacheadmissiondx_Sepsis, unknown","grouped_apacheadmissiondx_Overdose,"
0,252784,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
1,253331,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,...,0,0,0,0,0,0,0,0,0,0
2,255112,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
3,258354,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,...,0,0,0,0,0,0,0,0,0,0
4,259414,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9922,3247094,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
9923,3247116,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
9925,3247421,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
9926,3346588,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
flat_drug

Unnamed: 0,patient,gender,age,admissionweight,> 89,ACETAMINOPHEN,ADVAIR DISKUS,ALBUTEROL,ALBUTEROL SULFATE,ALLOPURINOL,...,TRAMADOL HCL,TRAZODONE HCL,TYLENOL,VITAMIN C,VITAMIN D,VITAMIN D3,WARFARIN SODIUM,XANAX,ZOCOR,ZOFRAN
0,252784,1,56.0,75.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,253331,1,76.0,114.7,0,0,0,1,0,1,...,0,0,0,0,0,0,1,0,0,0
2,255112,1,52.0,50.6,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,258354,0,61.0,74.4,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,259414,1,81.0,107.5,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9517,3247094,1,22.0,91.2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
9518,3247116,0,52.0,61.7,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
9519,3247421,1,59.0,76.2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9520,3346588,0,71.0,121.5,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [18]:
timeseries

Unnamed: 0,patient,time,-basos,-eos,-lymphs,-monos,-polys,ALT (SGPT),AST (SGOT),BUN,...,sao2_mask,heartrate_mask,respiration_mask,cvp_mask,systemicsystolic_mask,systemicdiastolic_mask,systemicmean_mask,st1_mask,st2_mask,st3_mask
0,252784,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
197,252784,198,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,...,1.00,1.00,1.000000,0.0,0.0,0.0,0.0,1.0,1.0,1.0
196,252784,197,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,...,1.00,1.00,1.000000,0.0,0.0,0.0,0.0,1.0,1.0,1.0
195,252784,196,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,...,1.00,1.00,1.000000,0.0,0.0,0.0,0.0,1.0,1.0,1.0
194,252784,195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,...,1.00,1.00,1.000000,0.0,0.0,0.0,0.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2878923,3347496,94,1.0,1.0,15.0,4.0,0.0,13.0,19.0,42.0,...,1.00,1.00,0.009740,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2878922,3347496,93,1.0,1.0,15.0,4.0,0.0,13.0,19.0,42.0,...,1.00,1.00,0.009868,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2878921,3347496,92,1.0,1.0,15.0,4.0,0.0,13.0,19.0,42.0,...,1.00,1.00,0.010000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2878900,3347496,71,1.0,1.0,15.0,4.0,0.0,13.0,19.0,42.0,...,1.00,1.00,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
flat

Unnamed: 0,patient,hour,gender,age,admissionweight,> 89
0,252784,15.0,1,56.0,75.0,0
1,253331,1.0,1,76.0,114.7,0
2,255112,5.0,1,52.0,50.6,0
3,258354,19.0,0,61.0,74.4,0
4,259414,1.0,1,81.0,107.5,0
...,...,...,...,...,...,...
9888,3247094,21.0,1,22.0,91.2,0
9889,3247116,21.0,0,52.0,61.7,0
9891,3247421,1.0,1,59.0,76.2,0
9892,3346588,17.0,0,71.0,121.5,0
