In [2]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import getpass
import pdvega
import seaborn as sns
# for configuring connection 
from configobj import ConfigObj
import os

%matplotlib inline

In [3]:
# Create a database connection using settings from config file
config='../db/config.ini'

# connection info
conn_info = dict()
if os.path.isfile(config):
    config = ConfigObj(config)
    conn_info["sqluser"] = config['username']
    conn_info["sqlpass"] = config['password']
    conn_info["sqlhost"] = config['host']
    conn_info["sqlport"] = config['port']
    conn_info["dbname"] = config['dbname']
    conn_info["schema_name"] = config['schema_name']
else:
    conn_info["sqluser"] = 'postgres'
    conn_info["sqlpass"] = ''
    conn_info["sqlhost"] = '192.168.60.144'
    conn_info["sqlport"] = 6432
    conn_info["dbname"] = 'eicu'
    conn_info["schema_name"] = 'public,eicu_crd'
    
# Connect to the eICU database
print('Database: {}'.format(conn_info['dbname']))
print('Username: {}'.format(conn_info["sqluser"]))
if conn_info["sqlpass"] == '':
    # try connecting without password, i.e. peer or OS authentication
    try:
        if (conn_info["sqlhost"] == '192.168.60.144') & (conn_info["sqlport"]=='6432'):
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   user=conn_info["sqluser"])            
        else:
            con = psycopg2.connect(dbname=conn_info["dbname"],
                                   host=conn_info["sqlhost"],
                                   port=conn_info["sqlport"],
                                   user=conn_info["sqluser"])
    except:
        conn_info["sqlpass"] = getpass.getpass('Password: ')

        con = psycopg2.connect(dbname=conn_info["dbname"],
                               host=conn_info["sqlhost"],
                               port=conn_info["sqlport"],
                               user=conn_info["sqluser"],
                               password=conn_info["sqlpass"])
query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'

Database: eicu
Username: postgres


In [4]:
from sqlalchemy import create_engine
con= create_engine('postgresql://eicu@192.168.60.144:6432/eicu')

## 筛选出病人来自 direct 和 emergency 并且 住icu时长为 1到14天的病人

In [5]:
query = query_schema + """
SELECT DISTINCT patientunitstayid, hospitaladmitOffset, gender, age, apacheadmissiondx, unitadmitsource, admissionweight,	dischargeweight, unitdischargeoffset, unitdischargelocation,	unitdischargestatus
FROM patient
WHERE unitadmitsource IN ('Emergency Department', 'Direct Admit') 
    AND unitdischargeoffset >=1440 
    AND unitdischargeoffset <=20160
      
"""
df_p = pd.read_sql_query(query, con)

In [6]:
df_p.head(n=20)

Unnamed: 0,patientunitstayid,hospitaladmitoffset,gender,age,apacheadmissiondx,unitadmitsource,admissionweight,dischargeweight,unitdischargeoffset,unitdischargelocation,unitdischargestatus
0,1201807,-69,Male,66,"Sepsis, GI",Emergency Department,99.7,,12151,Floor,Alive
1,777747,-165,Female,62,Emphysema/bronchitis,Emergency Department,60.0,,5232,Step-Down Unit (SDU),Alive
2,1805725,-149,Male,> 89,"Sepsis, renal/UTI (including bladder)",Emergency Department,83.0,83.0,3780,Floor,Alive
3,1361952,-14,Male,53,"Sepsis, unknown",Emergency Department,66.8,58.4,7386,Floor,Alive
4,324737,0,Female,47,"Sepsis, unknown",Direct Admit,147.8,,2911,Floor,Alive
5,226752,-11,Male,33,"Neoplasm, neurologic",Direct Admit,83.9,78.7,4145,Floor,Alive
6,242583,0,Female,89,"Bleeding, lower GI",Direct Admit,117.7,121.4,3206,Floor,Alive
7,1060556,-384,Male,47,Pneumothorax,Emergency Department,75.0,,1601,Floor,Alive
8,2850780,-99,Female,63,"Sepsis, pulmonary",Emergency Department,46.4,,1796,Telemetry,Alive
9,1159611,-80,Male,> 89,Cardiac arrest (with or without respiratory ar...,Emergency Department,58.9,,6072,Step-Down Unit (SDU),Alive


In [7]:
cols=[ 'patientunitstayid', 'unitdischargelocation', 'unitdischargestatus']
df_p1=df_p[cols]
frequency_table =df_p.groupby(['unitdischargelocation', 'unitdischargestatus']).size().reset_index(name='count')
print(frequency_table)


       unitdischargelocation unitdischargestatus  count
0                                                     4
1                                          Alive     66
2           Acute Care/Floor               Alive   6725
3                      Death             Expired   3452
4                      Floor               Alive  33902
5                       Home               Alive   6909
6                        ICU               Alive    216
7               Nursing Home               Alive    135
8             Operating Room               Alive      4
9                      Other               Alive    530
10            Other External               Alive   1122
11            Other Hospital               Alive   1250
12                 Other ICU               Alive   1045
13          Other ICU (CABG)               Alive      3
14            Other Internal               Alive     98
15            Rehabilitation               Alive    219
16  Skilled Nursing Facility               Alive

- Home：恢复最好
- Floor、Rehabilitation、Acute Care/Floor：表示病情有所好转。
- Step-Down Unit (SDU)、Skilled Nursing Facility、Other：患者需要监护，情况相对中等。
- ICU、Operating Room、Other ICU，Other ICU (CABG)：病情较为严重。
- Death 最严重的情况。

## 统计 nan 值

In [14]:
missing_ratio = pd.DataFrame(df_p.isna().sum() / len(df_p.index), columns = ['missing ratio %']) * 100
missing_ratio.sort_values('missing ratio %', inplace = True, ascending = False) 
missing_ratio

Unnamed: 0,missing ratio %
dischargeweight,38.985478
admissionweight,2.023493
patientunitstayid,0.0
hospitaladmitoffset,0.0
gender,0.0
age,0.0
apacheadmissiondx,0.0
unitadmitsource,0.0
unitdischargeoffset,0.0
unitdischargelocation,0.0


In [18]:
freq_l = pd.DataFrame()
freq_l['noAnnotations'] = df_p['unitdischargelocation'].value_counts() 
freq_l['%'] = df_p['unitdischargelocation'].value_counts() / len(df_p.index) * 100
freq_l

Unnamed: 0_level_0,noAnnotations,%
unitdischargelocation,Unnamed: 1_level_1,Unnamed: 2_level_1
Floor,33902,49.531017
Step-Down Unit (SDU),7617,11.128481
Home,6909,10.094089
Acute Care/Floor,6725,9.825264
Telemetry,4325,6.31885
Death,3452,5.043392
Other Hospital,1250,1.826257
Other External,1122,1.639248
Other ICU,1045,1.526751
Skilled Nursing Facility,824,1.203869


In [19]:
freq_s = pd.DataFrame()
freq_s['noAnnotations'] = df_p['unitdischargestatus'].value_counts() 
freq_s['%'] = df_p['unitdischargestatus'].value_counts() / len(df_p.index) * 100
freq_s

Unnamed: 0_level_0,noAnnotations,%
unitdischargestatus,Unnamed: 1_level_1,Unnamed: 2_level_1
Alive,64990,94.950764
Expired,3452,5.043392
,4,0.005844
