In [3]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
import seaborn as sns
# for configuring connection 
from configobj import ConfigObj
import os

%matplotlib inline

In [4]:
datadir = '/home/mei/nas/docker/dataset/EICU/eicu-collaborative-research-database-2.0/'
porcesseddir2 = '/home/mei/nas/docker/processedData_2/'

In [3]:
# Create a database connection using settings from config file
config='../db/config.ini'

# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = ''
    conn_info["sqlhost"] = '192.168.60.144'
    conn_info["sqlport"] = 6432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu_crd'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == '192.168.60.144') & (conn_info["sqlport"]=='6432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   host=conn_info["sqlhost"],
                                   port=conn_info["sqlport"],
                                   user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')

        con = psycopg2.connect(dbname=conn_info["dbname"],
                               host=conn_info["sqlhost"],
                               port=conn_info["sqlport"],
                               user=conn_info["sqluser"],
                               password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'

Database: eicu
Username: postgres


In [3]:
from sqlalchemy import create_engine,text
con= create_engine('postgresql://eicu@192.168.60.144:6432/eicu')

In [5]:
def round_up(x, base=5):
    return base * round(x/base)

In [6]:
query = query_schema + """
select p.patientunitstayid, p.hospitaladmitOffset,ph.pasthistoryoffset,p.gender, p.age, 
      p.apacheadmissiondx,ph.pasthistorypath,p.unitadmitsource, p.admissionweight,	
      p.dischargeweight, p.unitdischargeoffset, p.unitdischargelocation,	
      p.unitdischargestatus
from pasthistory ph
join patient_2 p
  on ph.patientunitstayid = p.patientunitstayid
  
order by p.patientunitstayid, p.hospitaladmitOffset,ph.pasthistoryoffset
"""

In [7]:
df_past= pd.read_sql_query(query,con)
df_past.head(n=20)

Unnamed: 0,patientunitstayid,hospitaladmitoffset,pasthistoryoffset,gender,age,apacheadmissiondx,pasthistorypath,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus
0,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
1,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
2,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
3,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
4,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Past History...,Direct Admit,84.3,85.8,3596,Death,Expired
5,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
6,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
7,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
8,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
9,141168,0,114,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired


In [8]:
df=df_past.copy()
# Set indices
df = df.set_index(['patientunitstayid', 'hospitaladmitoffset','pasthistoryoffset'])
df = df.drop(df.index[np.where(df.index.get_level_values('pasthistoryoffset') < 0)])
df.sort_index(level = ['patientunitstayid','hospitaladmitoffset', 'pasthistoryoffset'], inplace = True)
# Resample every 5 mins
df.rename(round_up, level = 'pasthistoryoffset', inplace = True)
df.head(n=10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gender,age,apacheadmissiondx,pasthistorypath,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus
patientunitstayid,hospitaladmitoffset,pasthistoryoffset,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Past History...,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,115,Female,70,"Rhythm disturbance (atrial, supraventricular)",notes/Progress Notes/Past History/Organ System...,Direct Admit,84.3,85.8,3596,Death,Expired


In [9]:
df1 = df[df['pasthistorypath'].str.contains('Organ Systems')]
df1 = df1.reset_index()
df1['pasthistorypath'] = df1['pasthistorypath'].str.replace('notes/Progress Notes/Past History/Organ Systems/', ' ')
df1['pasthistorypath'] = df1['pasthistorypath'].str.replace('|', '/')
split = df1['pasthistorypath'].str.split('/')
df1['first'] = split.str[0]
df1['second'] = split.str[1]
df1['third'] = split.str[2]
df1['fourth'] = split.str[3]

df1.head()

Unnamed: 0,patientunitstayid,hospitaladmitoffset,pasthistoryoffset,gender,age,apacheadmissiondx,pasthistorypath,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus,first,second,third,fourth
0,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Pulmonary/COPD/COPD - no limitations,Direct Admit,84.3,85.8,3596,Death,Expired,Pulmonary,COPD,COPD - no limitations,
1,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Valve disease/AS,Direct Admit,84.3,85.8,3596,Death,Expired,Cardiovascular (R),Valve disease,AS,
2,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Congestive Heart Failure/C...,Direct Admit,84.3,85.8,3596,Death,Expired,Cardiovascular (R),Congestive Heart Failure,CHF - class II,
3,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Hypertension Requiring Tre...,Direct Admit,84.3,85.8,3596,Death,Expired,Cardiovascular (R),Hypertension Requiring Treatment,hypertension requiring treatment,
4,141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/AICD/AICD,Direct Admit,84.3,85.8,3596,Death,Expired,Cardiovascular (R),AICD,AICD,


In [10]:
columns = df1.columns.tolist()
index = columns.index('pasthistorypath')
new_order = columns[:index + 1] + ['first', 'second', 'third', 'fourth'] + columns[index + 1:-4]
df1 = df1.reindex(columns=new_order)
df1 = df1.set_index(['patientunitstayid', 'hospitaladmitoffset','pasthistoryoffset'])
# df1.sort_index(level = ['patientunitstayid','hospitaladmitoffset', 'pasthistoryoffset'], inplace = True)
df1.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gender,age,apacheadmissiondx,pasthistorypath,first,second,third,fourth,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus
patientunitstayid,hospitaladmitoffset,pasthistoryoffset,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Pulmonary/COPD/COPD - no limitations,Pulmonary,COPD,COPD - no limitations,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Valve disease/AS,Cardiovascular (R),Valve disease,AS,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Congestive Heart Failure/C...,Cardiovascular (R),Congestive Heart Failure,CHF - class II,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Hypertension Requiring Tre...,Cardiovascular (R),Hypertension Requiring Treatment,hypertension requiring treatment,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/AICD/AICD,Cardiovascular (R),AICD,AICD,,Direct Admit,84.3,85.8,3596,Death,Expired


In [11]:
# df1.to_csv(porcesseddir2 + 'selected_pastHistory.csv',  index=True)

In [12]:
n=len(list(df1.index.get_level_values('patientunitstayid').unique()))
r=len(df1)
print("There are {} unique patientunitstayid in the selected patients with the total {} records.".format(n,r))

There are 53612 unique patientunitstayid in the selected patients with the total 312103 records.


In [5]:
df1= pd.read_csv(porcesseddir2 + 'selected_pastHistory.csv')
df1 = df1.set_index(['patientunitstayid', 'hospitaladmitoffset','pasthistoryoffset'])
df1.sort_index(level = ['patientunitstayid','hospitaladmitoffset', 'pasthistoryoffset'], inplace = True)
df1.head(n=20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gender,age,apacheadmissiondx,pasthistorypath,first,second,third,fourth,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus
patientunitstayid,hospitaladmitoffset,pasthistoryoffset,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Pulmonary/COPD/COPD - no limitations,Pulmonary,COPD,COPD - no limitations,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Valve disease/AS,Cardiovascular (R),Valve disease,AS,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Congestive Heart Failure/C...,Cardiovascular (R),Congestive Heart Failure,CHF - class II,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Hypertension Requiring Tre...,Cardiovascular (R),Hypertension Requiring Treatment,hypertension requiring treatment,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/AICD/AICD,Cardiovascular (R),AICD,AICD,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Renal (R)/Renal Failure/renal failure- not c...,Renal (R),Renal Failure,renal failure- not currently dialyzed,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Rheumatic/Rheumatoid Arthritis/rheumatoid art...,Rheumatic,Rheumatoid Arthritis,rheumatoid arthritis,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Arrhythmias/atrial fibrill...,Cardiovascular (R),Arrhythmias,atrial fibrillation - chronic,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,115,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Valve disease/AS,Cardiovascular (R),Valve disease,AS,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,115,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Hypertension Requiring Tre...,Cardiovascular (R),Hypertension Requiring Treatment,hypertension requiring treatment,,Direct Admit,84.3,85.8,3596,Death,Expired


In [11]:
df1_reset = df1.reset_index()
df_unique = df1_reset.drop_duplicates(subset=['patientunitstayid'])
freq = pd.DataFrame()
freq['noAnnotations'] = df_unique['apacheadmissiondx'].value_counts()
freq['%'] = (df_unique['apacheadmissiondx'].value_counts() / len(df_unique)) * 100
freq.head(n=10)

Unnamed: 0_level_0,noAnnotations,%
apacheadmissiondx,Unnamed: 1_level_1,Unnamed: 2_level_1
"Sepsis, pulmonary",3972,7.408789
"CVA, cerebrovascular accident/stroke",2923,5.452138
"Infarction, acute myocardial (MI)",2867,5.347683
"CHF, congestive heart failure",2773,5.172349
"Sepsis, renal/UTI (including bladder)",2552,4.760128
Diabetic ketoacidosis,2316,4.319928
Emphysema/bronchitis,1944,3.626054
Cardiac arrest (with or without respiratory arrest; for respiratory arrest see Respiratory System),1599,2.982541
"Pneumonia, bacterial",1576,2.93964
"Rhythm disturbance (atrial, supraventricular)",1560,2.909796


In [14]:
# freq.to_csv(porcesseddir2 + 'selected_pastHistory_apacheaddx_freq.csv',  index=True)

## nan 值统计

In [15]:
missing_ratio = pd.DataFrame(df1.isna().sum() / len(df1.index), columns = ['missing ratio %']) * 100
missing_ratio.sort_values('missing ratio %', inplace = True, ascending = False) 
missing_ratio

Unnamed: 0,missing ratio %
fourth,89.413431
dischargeweight,34.968264
admissionweight,1.186788
gender,0.0
age,0.0
apacheadmissiondx,0.0
pasthistorypath,0.0
first,0.0
second,0.0
third,0.0


## frequency of past dx history 

### first

In [16]:
frequency1 = pd.DataFrame()
frequency1['noAnnotations'] = df1['first'].value_counts() 
frequency1['%'] = df1['first'].value_counts() / len(df1.index) * 100
frequency1

Unnamed: 0_level_0,noAnnotations,%
first,Unnamed: 1_level_1,Unnamed: 2_level_1
Cardiovascular (R),144104,46.171937
Endocrine (R),45774,14.666312
Pulmonary,39744,12.734258
Neurologic,27196,8.71379
Hematology,22345,7.159495
Renal (R),17722,5.678254
Gastrointestinal (R),10621,3.403043
Rheumatic,2666,0.854205
Infectious Disease (R),1931,0.618706


In [17]:
frequency2 = pd.DataFrame()
frequency2['noAnnotations'] = df1['second'].value_counts() 
frequency2['%'] = df1['second'].value_counts() / len(df1.index) * 100
frequency2

Unnamed: 0_level_0,noAnnotations,%
second,Unnamed: 1_level_1,Unnamed: 2_level_1
Hypertension Requiring Treatment,56983,18.257755
Oncology (R),22345,7.159495
COPD,19067,6.109201
Congestive Heart Failure,18473,5.918879
Non-Insulin Dependent Diabetes,17760,5.690429
Insulin Dependent Diabetes,16268,5.212382
Arrhythmias,15676,5.022701
Myocardial Infarction,10655,3.413937
Hypothyroidism,10649,3.412015
Strokes,10374,3.323903


In [12]:
frequency3 = pd.DataFrame()
frequency3['noAnnotations'] = df1['third'].value_counts() 
frequency3['%'] = df1['third'].value_counts() / len(df1.index) * 100
frequency3.head(n=10)

Unnamed: 0_level_0,noAnnotations,%
third,Unnamed: 1_level_1,Unnamed: 2_level_1
hypertension requiring treatment,56983,18.257755
Cancer,17532,5.617376
insulin dependent diabetes,16268,5.212382
CHF,14251,4.566121
medication dependent,13678,4.382528
atrial fibrillation - chronic,10881,3.486349
hypothyroidism,10649,3.412015
COPD - moderate,9063,2.903849
asthma,8242,2.640795
stroke - date unknown,6399,2.050285


In [19]:
frequency4 = pd.DataFrame()
frequency4['noAnnotations'] = df1['fourth'].value_counts() 
frequency4['%'] = df1['fourth'].value_counts() / len(df1.index) * 100
frequency4

Unnamed: 0_level_0,noAnnotations,%
fourth,Unnamed: 1_level_1,Unnamed: 2_level_1
Cancer-Primary Site,12246,3.923705
Metastases,3389,1.085859
multiple,3035,0.972435
Chemotherapy,2274,0.728606
Hematologic Malignancy,1897,0.607812
p AVR,1286,0.412043
Radiation Therapy within past 6 months,1113,0.356613
s,999,0.320087
o GI bleeding,967,0.309834
V paced,796,0.255044


In [14]:
df_past_dx=df1.copy()
frequency_table_1 =df_past_dx.groupby(['apacheadmissiondx','first']).size().reset_index(name='count')
frequency_table_1 = frequency_table_1.sort_values(by='count', ascending=False)
frequency_table_1.head(n=20)

Unnamed: 0,apacheadmissiondx,first,count
309,"CHF, congestive heart failure",Cardiovascular (R),11868
1603,"Sepsis, pulmonary",Cardiovascular (R),10688
318,"CVA, cerebrovascular accident/stroke",Cardiovascular (R),7596
1110,"Infarction, acute myocardial (MI)",Cardiovascular (R),7308
1612,"Sepsis, renal/UTI (including bladder)",Cardiovascular (R),6442
677,Emphysema/bronchitis,Pulmonary,5130
389,Cardiac arrest (with or without respiratory ar...,Cardiovascular (R),4985
671,Emphysema/bronchitis,Cardiovascular (R),4823
1413,"Pneumonia, bacterial",Cardiovascular (R),4509
1534,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R),4405


In [15]:
frequency_table_2 =df_past_dx.groupby(['apacheadmissiondx','second']).size().reset_index(name='count')
frequency_table_2 = frequency_table_2.sort_values(by='count', ascending=False)
frequency_table_2.head(n=20)

Unnamed: 0,apacheadmissiondx,second,count
5294,"Sepsis, pulmonary",Hypertension Requiring Treatment,3941
1023,"CVA, cerebrovascular accident/stroke",Hypertension Requiring Treatment,3595
974,"CHF, congestive heart failure",Hypertension Requiring Treatment,3101
965,"CHF, congestive heart failure",Congestive Heart Failure,2856
3564,"Infarction, acute myocardial (MI)",Hypertension Requiring Treatment,2841
2135,Emphysema/bronchitis,COPD,2599
5344,"Sepsis, renal/UTI (including bladder)",Hypertension Requiring Treatment,2585
1904,Diabetic ketoacidosis,Insulin Dependent Diabetes,2573
5304,"Sepsis, pulmonary",Oncology (R),2436
5283,"Sepsis, pulmonary",COPD,2203


In [29]:
df1.head(n=10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gender,age,apacheadmissiondx,pasthistorypath,first,second,third,fourth,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus
patientunitstayid,hospitaladmitoffset,pasthistoryoffset,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Pulmonary/COPD/COPD - no limitations,Pulmonary,COPD,COPD - no limitations,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Valve disease/AS,Cardiovascular (R),Valve disease,AS,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Congestive Heart Failure/C...,Cardiovascular (R),Congestive Heart Failure,CHF - class II,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Hypertension Requiring Tre...,Cardiovascular (R),Hypertension Requiring Treatment,hypertension requiring treatment,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/AICD/AICD,Cardiovascular (R),AICD,AICD,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Renal (R)/Renal Failure/renal failure- not c...,Renal (R),Renal Failure,renal failure- not currently dialyzed,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Rheumatic/Rheumatoid Arthritis/rheumatoid art...,Rheumatic,Rheumatoid Arthritis,rheumatoid arthritis,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Arrhythmias/atrial fibrill...,Cardiovascular (R),Arrhythmias,atrial fibrillation - chronic,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,115,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Valve disease/AS,Cardiovascular (R),Valve disease,AS,,Direct Admit,84.3,85.8,3596,Death,Expired
141168,0,115,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Hypertension Requiring Tre...,Cardiovascular (R),Hypertension Requiring Treatment,hypertension requiring treatment,,Direct Admit,84.3,85.8,3596,Death,Expired


In [43]:
df1['first'].unique()

array(['Pulmonary', 'Cardiovascular (R)', 'Renal  (R)', 'Rheumatic',
       'Endocrine (R)', 'Gastrointestinal (R)', 'Infectious Disease (R)',
       'Neurologic', 'Hematology'], dtype=object)

In [44]:
# 去除 first 列中的多余空格
df1['first'] = df1['first'].str.strip()

# 然后运行你的代码
from tabulate import tabulate

diseases = ['Cardiovascular (R)', 'Endocrine (R)', 'Pulmonary', 'Neurologic', 'Hematology', 'Renal  (R)', 'Gastrointestinal (R)', 'Rheumatic', 'Infectious Disease (R)']

for disease in diseases:
    # 筛选出特定疾病的数据
    df_disease = df1[df1['first'] == disease].copy()
    # 统计疾病的频率
    freq = pd.DataFrame()
    freq['noAnnotations'] = df_disease['second'].value_counts()
    freq['%'] = round(df_disease['second'].value_counts() / len(df_disease.index) * 100, 2)

    # 格式化输出
    table = tabulate(freq, headers="keys", tablefmt="psql", showindex=True)
    print(f"The frequency of {disease}:")
    print(table)
    print("------------------------------")


The frequency of Cardiovascular (R):
+----------------------------------+-----------------+-------+
| second                           |   noAnnotations |     % |
|----------------------------------+-----------------+-------|
| Hypertension Requiring Treatment |           56983 | 39.54 |
| Congestive Heart Failure         |           18473 | 12.82 |
| Arrhythmias                      |           15676 | 10.88 |
| Myocardial Infarction            |           10655 |  7.39 |
| Procedural Coronary Intervention |            7897 |  5.48 |
| Coronary Artery Bypass           |            7155 |  4.97 |
| Valve disease                    |            5973 |  4.14 |
| Peripheral Vascular Disease      |            5149 |  3.57 |
| Venous Thrombosis                |            4292 |  2.98 |
| Pacemaker                        |            4019 |  2.79 |
| Angina                           |            2533 |  1.76 |
| Pulmonary Embolism               |            2389 |  1.66 |
| AICD            

In [25]:
from bigtree import Node, list_to_tree, tree_to_dot, dataframe_to_tree, tree_to_pillow

for disease in frequency1.index.get_level_values('first'):
    df_ = df1[df1['first'] == disease]
    # df_.set_index(['patientunitstayid', 'pasthistoryoffset'], inplace = True)
    
    paths_mult_ = df_.groupby('pasthistorypath', dropna = True)['pasthistorypath'].size().to_frame('noAnnotations')
    paths_mult_.reset_index(inplace=True)
    paths_mult_.sort_values('noAnnotations', ascending = False, inplace = True)

    paths_ = list(pd.unique(df_['pasthistorypath']))
    root_ = dataframe_to_tree(paths_mult_, sep = '/')
    root_.show(attr_list=["noAnnotations"])
    pillow_image_ = tree_to_pillow(root_)
    pillow_image_.save(porcesseddir2  + str(disease) + "selected_pastHistoryTree.jpg")


 Cardiovascular (R)
├── Hypertension Requiring Treatment
│   └── hypertension requiring treatment [noAnnotations=56983]
├── Congestive Heart Failure
│   ├── CHF [noAnnotations=14251]
│   ├── CHF - severity unknown [noAnnotations=2524]
│   ├── CHF - class III [noAnnotations=570]
│   ├── CHF - class IV [noAnnotations=452]
│   ├── CHF - class II [noAnnotations=443]
│   └── CHF - class I [noAnnotations=233]
├── Arrhythmias
│   ├── atrial fibrillation - chronic [noAnnotations=10881]
│   ├── atrial fibrillation - intermittent [noAnnotations=3017]
│   ├── ventricular tachycardia [noAnnotations=602]
│   ├── SVT- other [noAnnotations=464]
│   ├── sick sinus syndrome [noAnnotations=438]
│   ├── ventricular fibrillation [noAnnotations=128]
│   ├── ventricular ectopy [noAnnotations=118]
│   └── MAT [noAnnotations=28]
├── Myocardial Infarction
│   ├── MI - date unknown [noAnnotations=5826]
│   ├── MI - remote [noAnnotations=1931]
│   ├── MI - within 6 months [noAnnotations=1029]
│   ├── MI - within

## graph structure to encode past history

In [30]:
df1= pd.read_csv(porcesseddir2 + 'selected_pastHistory.csv')
df1 = df1.set_index(['patientunitstayid', 'hospitaladmitoffset','pasthistoryoffset'])
df1.sort_index(level = ['patientunitstayid','hospitaladmitoffset', 'pasthistoryoffset'], inplace = True)

df=df1.copy()

In [31]:
import networkx as nx
from node2vec import Node2Vec  

# Create the graph
G = nx.DiGraph()

# Adding edges based on 'first', 'second', and 'third' relationships
for _, row in df.iterrows():
    if pd.notna(row['first']) and pd.notna(row['second']):
        G.add_edge(row['first'], row['second'])
    if pd.notna(row['second']) and pd.notna(row['third']):
        G.add_edge(row['second'], row['third'])

# Generate embeddings using node2vec
node2vec = Node2Vec(G, dimensions=16, walk_length=10, num_walks=100,workers=4)
model = node2vec.fit(window=5,min_count=1, batch_words=4)

# Obtain node embeddings
node_embeddings = {node: model.wv[node] for node in G.nodes()}

# Map embeddings back to the dataset
def get_embedding(row):
    first_embed = node_embeddings.get(row['first'], [0]*16)
    second_embed = node_embeddings.get(row['second'], [0]*16)
    third_embed = node_embeddings.get(row['third'], [0]*16)
    # Combine embeddings, here by concatenation
    return first_embed + second_embed + third_embed

df['past_history_embedding'] = df.apply(get_embedding, axis=1)



Computing transition probabilities: 100%|██████████| 209/209 [00:00<00:00, 46076.72it/s]
Generating walks (CPU: 1): 100%|██████████| 25/25 [00:00<00:00, 317.32it/s]
Generating walks (CPU: 2): 100%|██████████| 25/25 [00:00<00:00, 321.77it/s]
Generating walks (CPU: 3): 100%|██████████| 25/25 [00:00<00:00, 325.22it/s]
Generating walks (CPU: 4): 100%|██████████| 25/25 [00:00<00:00, 321.56it/s]


In [32]:
node_embeddings


{' Pulmonary': array([ 0.03676878, -0.01850841,  0.01976099,  0.03122324,  0.05292767,
         0.03513385,  0.05942842, -0.06029176, -0.04976672, -0.04223847,
        -0.04667318, -0.04977261, -0.04867606, -0.01839142,  0.008726  ,
        -0.01796743], dtype=float32),
 'COPD': array([ 0.29238734, -0.4019603 , -0.12774564,  0.17460066,  0.25233665,
        -0.1640202 ,  0.73615444,  0.12936051, -0.07475804, -0.00099248,
        -0.08477809,  0.08813667, -0.00833007, -0.37394023, -0.42503622,
         0.1399634 ], dtype=float32),
 'COPD  - no limitations': array([ 1.0659351e-01, -1.8591116e-01, -1.1954455e-04,  8.3978586e-02,
         6.8768203e-02, -5.2483819e-02,  3.3102617e-01, -2.2693705e-02,
         6.7277737e-02,  7.3387071e-02, -8.5583389e-02,  1.4631544e-02,
         4.3209422e-02, -1.8520169e-01, -2.5717363e-01,  3.5334253e-04],
       dtype=float32),
 ' Cardiovascular (R)': array([ 9.0889633e-05,  2.1456778e-03, -6.1658926e-02,  6.2104687e-02,
        -1.2508377e-02, -3.0545

In [34]:
df['past_history_embedding'].head(n=10)

patientunitstayid  hospitaladmitoffset  pasthistoryoffset
141168             0                    60                   [0.43574962, -0.60637987, -0.1081042, 0.289802...
                                        60                   [0.32679254, -0.37862754, -0.14824907, 0.21111...
                                        60                   [0.35420743, -0.42040563, -0.10733586, 0.16626...
                                        60                   [0.4642007, -0.4940417, 0.042913333, 0.3671931...
                                        60                   [3.6817322, -3.4920173, -0.90537065, -0.259674...
                                        60                   [0.37495118, -0.5624727, -0.12942764, 0.005626...
                                        60                   [0.33978766, -0.50038135, -0.017052568, 0.2514...
                                        60                   [0.31318283, -0.3843747, -0.14824475, 0.066163...
                                        115           

In [35]:
df.head(n=10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gender,age,apacheadmissiondx,pasthistorypath,first,second,third,fourth,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus,past_history_embedding
patientunitstayid,hospitaladmitoffset,pasthistoryoffset,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Pulmonary/COPD/COPD - no limitations,Pulmonary,COPD,COPD - no limitations,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.43574962, -0.60637987, -0.1081042, 0.289802..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Valve disease/AS,Cardiovascular (R),Valve disease,AS,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.32679254, -0.37862754, -0.14824907, 0.21111..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Congestive Heart Failure/C...,Cardiovascular (R),Congestive Heart Failure,CHF - class II,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.35420743, -0.42040563, -0.10733586, 0.16626..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Hypertension Requiring Tre...,Cardiovascular (R),Hypertension Requiring Treatment,hypertension requiring treatment,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.4642007, -0.4940417, 0.042913333, 0.3671931..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/AICD/AICD,Cardiovascular (R),AICD,AICD,,Direct Admit,84.3,85.8,3596,Death,Expired,"[3.6817322, -3.4920173, -0.90537065, -0.259674..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Renal (R)/Renal Failure/renal failure- not c...,Renal (R),Renal Failure,renal failure- not currently dialyzed,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.37495118, -0.5624727, -0.12942764, 0.005626..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Rheumatic/Rheumatoid Arthritis/rheumatoid art...,Rheumatic,Rheumatoid Arthritis,rheumatoid arthritis,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.33978766, -0.50038135, -0.017052568, 0.2514..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Arrhythmias/atrial fibrill...,Cardiovascular (R),Arrhythmias,atrial fibrillation - chronic,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.31318283, -0.3843747, -0.14824475, 0.066163..."
141168,0,115,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Valve disease/AS,Cardiovascular (R),Valve disease,AS,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.32679254, -0.37862754, -0.14824907, 0.21111..."
141168,0,115,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Hypertension Requiring Tre...,Cardiovascular (R),Hypertension Requiring Treatment,hypertension requiring treatment,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.4642007, -0.4940417, 0.042913333, 0.3671931..."


In [None]:
# df.to_csv(porcesseddir2 + 'embedded_pastHistory.csv')

df=pd.read_csv(porcesseddir2 + 'embedded_pastHistory.csv')


In [None]:
df2=df.copy()

In [39]:
df2['past_history_embedding']=np.array( df2['past_history_embedding'],dtype=object)

In [40]:
df2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,gender,age,apacheadmissiondx,pasthistorypath,first,second,third,fourth,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus,past_history_embedding
patientunitstayid,hospitaladmitoffset,pasthistoryoffset,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Pulmonary/COPD/COPD - no limitations,Pulmonary,COPD,COPD - no limitations,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.43574962, -0.60637987, -0.1081042, 0.289802..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Valve disease/AS,Cardiovascular (R),Valve disease,AS,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.32679254, -0.37862754, -0.14824907, 0.21111..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Congestive Heart Failure/C...,Cardiovascular (R),Congestive Heart Failure,CHF - class II,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.35420743, -0.42040563, -0.10733586, 0.16626..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/Hypertension Requiring Tre...,Cardiovascular (R),Hypertension Requiring Treatment,hypertension requiring treatment,,Direct Admit,84.3,85.8,3596,Death,Expired,"[0.4642007, -0.4940417, 0.042913333, 0.3671931..."
141168,0,60,Female,70,"Rhythm disturbance (atrial, supraventricular)",Cardiovascular (R)/AICD/AICD,Cardiovascular (R),AICD,AICD,,Direct Admit,84.3,85.8,3596,Death,Expired,"[3.6817322, -3.4920173, -0.90537065, -0.259674..."


## max pooling Aggregation

In [41]:
# df1['past_history_embedding'] = df1['past_history_embedding'].apply(np.array)

def max_pooling(embeddings):
    # Stack embeddings and take the maximum across each dimension
    return np.max(np.vstack(embeddings), axis=0)

df_max_pooled = df2.groupby('patientunitstayid')['past_history_embedding'].apply(max_pooling).reset_index()

df_max_pooled.rename(columns={'past_history_embedding': 'max_pooled_embedding'}, inplace=True)

print(df_max_pooled.head())

   patientunitstayid                               max_pooled_embedding
0             141168  [3.6817322, -0.33900663, 0.042913333, 0.367193...
1             141265  [0.33518007, -0.50759095, 0.050242223, 0.20850...
2             141266  [0.4642007, -0.33900663, 0.100118384, 0.367193...
3             141276  [0.33518007, -0.50759095, 0.050242223, 0.20850...
4             141284  [0.4848393, -0.49136096, 0.042913333, 0.367193...


In [46]:
df_max = df2.drop(columns=['past_history_embedding','pasthistorypath','first','second','third','fourth']).merge(df_max_pooled, on='patientunitstayid', how='left')
df_max = df_max.groupby('patientunitstayid').first().reset_index()
df_max.head()

Unnamed: 0,patientunitstayid,gender,age,apacheadmissiondx,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus,max_pooled_embedding
0,141168,Female,70,"Rhythm disturbance (atrial, supraventricular)",Direct Admit,84.3,85.8,3596,Death,Expired,"[3.6817322, -0.33900663, 0.042913333, 0.367193..."
1,141265,Male,67,"CVA, cerebrovascular accident/stroke",Direct Admit,100.0,91.8,6068,Floor,Alive,"[0.33518007, -0.50759095, 0.050242223, 0.20850..."
2,141266,Male,73,"Sepsis, renal/UTI (including bladder)",Emergency Department,120.4,112.9,1501,Floor,Alive,"[0.4642007, -0.33900663, 0.100118384, 0.367193..."
3,141276,Female,59,"Arrest, respiratory (without cardiac arrest)",Direct Admit,156.6,156.6,1684,Home,Alive,"[0.33518007, -0.50759095, 0.050242223, 0.20850..."
4,141284,Male,63,Anemia,Direct Admit,,88.5,2076,Floor,Alive,"[0.4848393, -0.49136096, 0.042913333, 0.367193..."


## sum aggregation

In [53]:
def sum_aggregation(embeddings):
    return np.sum(np.vstack(embeddings), axis=0)

# Group by 'patientunitstayid' and apply sum aggregation
df_sum_aggregated = df.groupby('patientunitstayid')['past_history_embedding'].apply(sum_aggregation).reset_index()

# Rename the column to indicate sum-aggregated embeddings
df_sum_aggregated.rename(columns={'past_history_embedding': 'sum_aggregated_embedding'}, inplace=True)

# Display the result
print(df_sum_aggregated.head())

   patientunitstayid                           sum_aggregated_embedding
0             141168  [7.9882274, -7.4432325, -1.9430639, 2.9261658,...
1             141265  [0.14762807, -0.4765364, -0.08805785, 0.215569...
2             141266  [4.5776143, -4.4374404, -1.2511735, 2.3237233,...
3             141276  [0.14762807, -0.4765364, -0.08805785, 0.215569...
4             141284  [1.8346597, -1.7714701, -0.44224504, 0.8185866...


In [47]:
df_sum = df2.drop(columns=['past_history_embedding','pasthistorypath','first','second','third','fourth']).merge(df_max_pooled, on='patientunitstayid', how='left')
df_sum = df_sum.groupby('patientunitstayid').first().reset_index()
df_sum.head()

Unnamed: 0,patientunitstayid,gender,age,apacheadmissiondx,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus,max_pooled_embedding
0,141168,Female,70,"Rhythm disturbance (atrial, supraventricular)",Direct Admit,84.3,85.8,3596,Death,Expired,"[3.6817322, -0.33900663, 0.042913333, 0.367193..."
1,141265,Male,67,"CVA, cerebrovascular accident/stroke",Direct Admit,100.0,91.8,6068,Floor,Alive,"[0.33518007, -0.50759095, 0.050242223, 0.20850..."
2,141266,Male,73,"Sepsis, renal/UTI (including bladder)",Emergency Department,120.4,112.9,1501,Floor,Alive,"[0.4642007, -0.33900663, 0.100118384, 0.367193..."
3,141276,Female,59,"Arrest, respiratory (without cardiac arrest)",Direct Admit,156.6,156.6,1684,Home,Alive,"[0.33518007, -0.50759095, 0.050242223, 0.20850..."
4,141284,Male,63,Anemia,Direct Admit,,88.5,2076,Floor,Alive,"[0.4848393, -0.49136096, 0.042913333, 0.367193..."


## avg aggregation

In [54]:
def average_aggregation(embeddings):
    return np.mean(np.vstack(embeddings), axis=0)

# Group by 'patientunitstayid' and apply average aggregation
df_avg_aggregated = df.groupby('patientunitstayid')['past_history_embedding'].apply(average_aggregation).reset_index()

# Rename the column to indicate average-aggregated embeddings
df_avg_aggregated.rename(columns={'past_history_embedding': 'avg_aggregated_embedding'}, inplace=True)

# Display the result
print(df_avg_aggregated.head())

   patientunitstayid                           avg_aggregated_embedding
0             141168  [0.5705877, -0.5316595, -0.13879028, 0.2090118...
1             141265  [0.14762807, -0.4765364, -0.08805785, 0.215569...
2             141266  [0.3051743, -0.29582936, -0.08341157, 0.154914...
3             141276  [0.14762807, -0.4765364, -0.08805785, 0.215569...
4             141284  [0.45866492, -0.44286752, -0.11056126, 0.20464...


In [48]:
df_avg = df2.drop(columns=['past_history_embedding','pasthistorypath','first','second','third','fourth']).merge(df_max_pooled, on='patientunitstayid', how='left')
df_avg = df_avg.groupby('patientunitstayid').first().reset_index()
df_avg.head()

Unnamed: 0,patientunitstayid,gender,age,apacheadmissiondx,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus,max_pooled_embedding
0,141168,Female,70,"Rhythm disturbance (atrial, supraventricular)",Direct Admit,84.3,85.8,3596,Death,Expired,"[3.6817322, -0.33900663, 0.042913333, 0.367193..."
1,141265,Male,67,"CVA, cerebrovascular accident/stroke",Direct Admit,100.0,91.8,6068,Floor,Alive,"[0.33518007, -0.50759095, 0.050242223, 0.20850..."
2,141266,Male,73,"Sepsis, renal/UTI (including bladder)",Emergency Department,120.4,112.9,1501,Floor,Alive,"[0.4642007, -0.33900663, 0.100118384, 0.367193..."
3,141276,Female,59,"Arrest, respiratory (without cardiac arrest)",Direct Admit,156.6,156.6,1684,Home,Alive,"[0.33518007, -0.50759095, 0.050242223, 0.20850..."
4,141284,Male,63,Anemia,Direct Admit,,88.5,2076,Floor,Alive,"[0.4848393, -0.49136096, 0.042913333, 0.367193..."
