## Explore MIMIC-III: Overview of the MIMIC-III Data

[Tutorial is available online](https://mimic.physionet.org/tutorials/intro-to-mimic-iii/)

Dr. Maria P. Frushicheva @ MIT

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2

# used to calculate AUROC/accuracy
from sklearn import metrics

%matplotlib inline

In [2]:
# create a database connection
sqluser = 'mimic' 
dbname = 'mimic'
schema_name = 'mimiciii'

# Connect to local postgres version of mimic
con = psycopg2.connect(dbname=dbname, user=sqluser)
cur = con.cursor()

## Define and Track Patient Stays

### Admissions Table (defines HADM_ID)

In [3]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM admissions;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 58976 rows and  20 columns

Column Names:
row_id
subject_id
hadm_id
admittime
dischtime
deathtime
admission_type
admission_location
discharge_location
insurance
language
religion
marital_status
ethnicity
edregtime
edouttime
diagnosis
hospital_expire_flag
has_ioevents_data
has_chartevents_data


Unnamed: 0,row_id,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,religion,marital_status,ethnicity,edregtime,edouttime,diagnosis,hospital_expire_flag,has_ioevents_data,has_chartevents_data
0,90,87,190659,2191-02-25 20:30:00,2191-04-25 15:18:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,SHORT TERM HOSPITAL,Private,,UNOBTAINABLE,,UNKNOWN/NOT SPECIFIED,NaT,NaT,NEWBORN,0,1,1
1,91,88,123010,2111-08-29 03:03:00,2111-09-03 14:24:00,,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,,,,BLACK/AFRICAN AMERICAN,2111-08-29 01:44:00,2111-08-29 02:28:00,S/P MOTOR VEHICLE ACCIDENT-STABBING,0,1,1
2,92,89,188646,2185-06-17 05:22:00,2185-06-21 11:15:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,SHORT TERM HOSPITAL,Medicaid,,UNOBTAINABLE,,UNKNOWN/NOT SPECIFIED,NaT,NaT,NEWBORN,0,1,1
3,93,91,121205,2177-04-23 00:08:00,2177-05-10 15:16:00,2177-05-10 15:16:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,,JEWISH,MARRIED,WHITE,2177-04-22 21:02:00,2177-04-23 04:03:00,FEVER,1,1,1
4,94,92,142807,2122-12-13 19:30:00,2123-03-04 13:47:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,SHORT TERM HOSPITAL,Medicaid,,UNOBTAINABLE,,WHITE,NaT,NaT,NEWBORN,0,1,1


### Callout Table

In [4]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM callout;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 34499 rows and  24 columns

Column Names:
row_id
subject_id
hadm_id
submit_wardid
submit_careunit
curr_wardid
curr_careunit
callout_wardid
callout_service
request_tele
request_resp
request_cdiff
request_mrsa
request_vre
callout_status
callout_outcome
discharge_wardid
acknowledge_status
createtime
updatetime
acknowledgetime
outcometime
firstreservationtime
currentreservationtime


Unnamed: 0,row_id,subject_id,hadm_id,submit_wardid,submit_careunit,curr_wardid,curr_careunit,callout_wardid,callout_service,request_tele,...,callout_status,callout_outcome,discharge_wardid,acknowledge_status,createtime,updatetime,acknowledgetime,outcometime,firstreservationtime,currentreservationtime
0,64,109,137510,23.0,,45.0,MICU,1,MED,0,...,Inactive,Discharged,45.0,Acknowledged,2142-04-21 08:39:33,2142-04-21 08:39:33,2142-04-21 08:43:50,2142-04-21 22:25:07,2142-04-21 18:25:10,
1,65,109,151240,14.0,,45.0,TSICU,45,MED,0,...,Inactive,Discharged,45.0,Acknowledged,2142-05-15 19:08:17,2142-05-16 08:18:02,2142-05-16 08:21:34,2142-05-16 10:55:09,2142-05-16 06:55:09,
2,66,109,102024,50.0,,45.0,MICU,1,MED,1,...,Inactive,Discharged,45.0,Acknowledged,2142-05-22 07:19:54,2142-05-22 07:19:54,2142-05-22 08:01:04,2142-05-22 22:25:08,2142-05-22 19:40:10,
3,67,109,102024,23.0,MICU,23.0,MICU,1,MED,0,...,Inactive,Cancelled,,Acknowledged,2142-06-02 18:52:11,2142-06-03 07:53:38,2142-06-02 19:10:40,2142-06-03 07:53:38,2142-06-02 19:25:14,2142-06-03 07:40:08
4,68,109,102024,23.0,,45.0,MICU,1,MED,0,...,Inactive,Discharged,45.0,Acknowledged,2142-06-03 10:57:20,2142-06-03 10:57:20,2142-06-03 11:30:03,2142-06-03 14:55:08,2142-06-03 11:55:08,


### ICUSTAYS Table (defines ICUSTAY_ID)

In [5]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM icustays;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 61532 rows and  12 columns

Column Names:
row_id
subject_id
hadm_id
icustay_id
dbsource
first_careunit
last_careunit
first_wardid
last_wardid
intime
outtime
los


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,dbsource,first_careunit,last_careunit,first_wardid,last_wardid,intime,outtime,los
0,132,109,139061,257358,metavision,MICU,MICU,52,52,2141-09-11 10:13:28,2141-09-12 16:53:07,1.2775
1,133,109,172335,262652,metavision,MICU,MICU,23,23,2141-09-20 20:44:36,2141-09-22 21:44:50,2.0418
2,134,109,126055,236124,metavision,MICU,SICU,23,57,2141-10-13 23:11:01,2141-10-25 20:49:04,11.9014
3,135,109,125288,257134,metavision,SICU,MICU,57,50,2141-11-18 14:01:37,2141-11-19 21:35:18,1.3151
4,136,109,161950,237552,metavision,MICU,MICU,23,23,2141-11-24 16:12:07,2141-11-26 21:51:50,2.2359


### Patients Table (defines SUBJECT_ID)

In [6]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM patients;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 46520 rows and  8 columns

Column Names:
row_id
subject_id
gender
dob
dod
dod_hosp
dod_ssn
expire_flag


Unnamed: 0,row_id,subject_id,gender,dob,dod,dod_hosp,dod_ssn,expire_flag
0,612,646,M,2128-01-05,,,,0
1,613,647,M,2106-03-24,,,,0
2,614,648,M,2139-07-13,,,,0
3,615,649,M,2177-06-23,,,,0
4,616,650,M,2051-04-15,2111-12-28 00:00:00,,2111-12-28 00:00:00,1


### Services Table

In [7]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM services;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 73343 rows and  6 columns

Column Names:
row_id
subject_id
hadm_id
transfertime
prev_service
curr_service


Unnamed: 0,row_id,subject_id,hadm_id,transfertime,prev_service,curr_service
0,312,188,132401,2162-01-10 18:27:52,MED,SURG
1,313,189,119333,2132-09-08 00:17:12,,CSURG
2,314,190,177065,2138-08-01 13:21:55,,NB
3,315,191,142081,2191-12-30 16:34:56,,MED
4,316,191,136614,2196-04-09 17:43:40,,CMED


### Transfers Table

In [8]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM transfers;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 261897 rows and  13 columns

Column Names:
row_id
subject_id
hadm_id
icustay_id
dbsource
eventtype
prev_careunit
curr_careunit
prev_wardid
curr_wardid
intime
outtime
los


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,dbsource,eventtype,prev_careunit,curr_careunit,prev_wardid,curr_wardid,intime,outtime,los
0,141,32,175413,,carevue,transfer,,,7.0,2.0,2170-04-15 20:21:00,2170-04-23 12:45:00,184.4
1,142,32,175413,,carevue,discharge,,,2.0,,2170-04-23 12:45:00,NaT,
2,143,33,176176,296681.0,carevue,admit,,MICU,,12.0,2116-12-23 22:31:53,2116-12-25 11:49:55,37.3
3,144,33,176176,,carevue,transfer,MICU,,12.0,45.0,2116-12-25 11:49:55,2116-12-27 12:05:48,48.26
4,145,33,176176,,carevue,discharge,,,45.0,,2116-12-27 12:05:48,NaT,


## Data from Critical Care Unit

### Caregivers Table (defines CGID)

In [9]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM caregivers;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 7567 rows and  4 columns

Column Names:
row_id
cgid
label
description


Unnamed: 0,row_id,cgid,label,description
0,135,14197,SW,Social Worker
1,136,14198,Res,Resident/Fellow/PA/NP
2,137,14199,RO,Read Only
3,138,14200,MD,Read Only
4,139,14201,CRT,


### Chartevents Table

In [None]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM chartevents;
"""
data = pd.read_sql_query(query,con)

#print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

### DateTimeEvents Table

In [10]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM datetimeevents;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 4486049 rows and  14 columns

Column Names:
row_id
subject_id
hadm_id
icustay_id
itemid
charttime
storetime
cgid
value
valueuom
error
resultstatus
stopped


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,itemid,charttime,storetime,cgid,value,valueuom,warning,error,resultstatus,stopped
0,1,6,107064.0,228232.0,5684,2175-05-31 08:00:00,2175-05-31 12:33:00,18765,2175-05-31 00:00:00,Date,,,,NotStopd
1,2,6,107064.0,228232.0,5685,2175-05-31 08:00:00,2175-05-31 12:33:00,18765,2175-05-31 00:00:00,Date,,,,NotStopd
2,3,6,107064.0,228232.0,6703,2175-05-31 08:00:00,2175-05-31 12:33:00,18765,2175-05-30 00:00:00,Date,,,,NotStopd
3,4,6,107064.0,228232.0,6705,2175-05-31 08:00:00,2175-05-31 12:33:00,18765,2175-05-30 00:00:00,Date,,,,NotStopd
4,5,6,107064.0,228232.0,5684,2175-05-31 12:00:00,2175-05-31 12:33:00,18765,2175-05-31 00:00:00,Date,,,,NotStopd


### InputEvents_CV Table

In [None]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM inputevents_cv;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

### InputEvents_MV Table

In [None]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM inputevents_mv;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

### NoteEvents Table

In [None]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM noteevents;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

### OutputEvents Table

In [11]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM outputevents;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 4349339 rows and  13 columns

Column Names:
row_id
subject_id
hadm_id
icustay_id
charttime
itemid
value
valueuom
storetime
cgid
stopped
newbottle
iserror


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,charttime,itemid,value,valueuom,storetime,cgid,stopped,newbottle,iserror
0,252,21219,177991.0,225765.0,2142-09-02 16:00:00,40055,180.0,ml,2142-09-02 16:13:00,18367,,,
1,253,21219,177991.0,225765.0,2142-09-02 16:30:00,40055,210.0,ml,2142-09-02 16:42:00,18367,,,
2,254,21219,177991.0,225765.0,2142-09-02 18:00:00,40055,180.0,ml,2142-09-02 18:34:00,18367,,,
3,255,21219,177991.0,225765.0,2142-09-02 20:00:00,40055,350.0,ml,2142-09-02 20:13:00,14431,,,
4,256,21219,177991.0,225765.0,2142-09-02 21:00:00,40055,110.0,ml,2142-09-02 21:19:00,14431,,,


### ProcedureEvents_MV Table

In [12]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM procedureevents_mv;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 258066 rows and  25 columns

Column Names:
row_id
subject_id
hadm_id
icustay_id
starttime
endtime
itemid
value
valueuom
location
locationcategory
storetime
cgid
orderid
linkorderid
ordercategoryname
secondaryordercategoryname
ordercategorydescription
isopenbag
continueinnextdept
cancelreason
statusdescription
comments_editedby
comments_canceledby
comments_date


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,starttime,endtime,itemid,value,valueuom,location,...,ordercategoryname,secondaryordercategoryname,ordercategorydescription,isopenbag,continueinnextdept,cancelreason,statusdescription,comments_editedby,comments_canceledby,comments_date
0,1,30354,120396,,2154-12-24 12:00:00,2154-12-25 17:02:00,224274,1742.0,min,L Hand,...,Peripheral Lines,,Task,1,0,0,FinishedRunning,,,
1,2,30354,120396,,2154-12-24 12:00:00,2154-12-26 13:00:00,224275,2940.0,min,LL Ant Forearm,...,Peripheral Lines,,Task,1,0,0,FinishedRunning,,,
2,3,30354,120396,,2154-12-24 12:03:00,2154-12-24 12:04:00,224385,1.0,,,...,Intubation/Extubation,,Electrolytes,0,0,0,FinishedRunning,,,
3,4,30354,120396,,2154-12-24 12:06:00,2154-12-26 17:00:00,225792,3174.0,min,,...,Ventilation,,Task,1,0,0,FinishedRunning,,,
4,5,30354,120396,,2154-12-24 15:49:00,2154-12-24 15:50:00,226237,1.0,,,...,Procedures,,Electrolytes,0,0,0,FinishedRunning,,,


## Hospital Record Tables

### CPTEvents Table

In [18]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM cptevents;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 573146 rows and  12 columns

Column Names:
row_id
subject_id
hadm_id
costcenter
chartdate
cpt_cd
cpt_number
cpt_suffix
ticket_id_seq
sectionheader
subsectionheader
description


Unnamed: 0,row_id,subject_id,hadm_id,costcenter,chartdate,cpt_cd,cpt_number,cpt_suffix,ticket_id_seq,sectionheader,subsectionheader,description
0,160,482,145066,ICU,,99233,99233.0,,1.0,Evaluation and management,Hospital inpatient services,
1,161,482,145066,ICU,,99232,99232.0,,2.0,Evaluation and management,Hospital inpatient services,
2,162,482,145066,ICU,,99232,99232.0,,3.0,Evaluation and management,Hospital inpatient services,
3,163,482,145066,ICU,,99232,99232.0,,4.0,Evaluation and management,Hospital inpatient services,
4,164,482,145066,ICU,,99232,99232.0,,5.0,Evaluation and management,Hospital inpatient services,


### Diagnoses_ICD Table

In [19]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM diagnoses_icd;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 651047 rows and  5 columns

Column Names:
row_id
subject_id
hadm_id
seq_num
icd9_code


Unnamed: 0,row_id,subject_id,hadm_id,seq_num,icd9_code
0,243,34,115799,8.0,E8790
1,244,34,144319,1.0,42789
2,245,34,144319,2.0,42822
3,246,34,144319,3.0,4263
4,247,34,144319,4.0,41401


### DRGCodes Table

In [20]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM drgcodes;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 125557 rows and  8 columns

Column Names:
row_id
subject_id
hadm_id
drg_type
drg_code
description
drg_severity
drg_mortality


Unnamed: 0,row_id,subject_id,hadm_id,drg_type,drg_code,description,drg_severity,drg_mortality
0,1,78996,138854,HCFA,107,CORONARY BYPASS WITH CARDIAC CATHETER,,
1,2,25080,185945,HCFA,104,CARDIAC VALVE & OTHER MAJOR CARDIOTHORACIC PRO...,,
2,3,11677,137776,HCFA,202,CIRRHOSIS & ALCOHOLIC HEPATITIS,,
3,4,20409,102314,HCFA,483,"TRACHEOSTOMY EXCEPT FOR FACE, MOUTH, & NECK DI...",,
4,5,11705,165730,HCFA,390,NEONATE WITH OTHER SIGNIFICANT PROBLEMS,,


### LabEvents Table

In [None]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM labevents;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

### MicroBiologyEvents Table

In [21]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM microbiologyevents;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 328446 rows and  16 columns

Column Names:
row_id
subject_id
hadm_id
chartdate
charttime
spec_itemid
spec_type_desc
org_itemid
org_name
isolate_num
ab_itemid
ab_name
dilution_text
dilution_comparison
dilution_value
interpretation


Unnamed: 0,row_id,subject_id,hadm_id,chartdate,charttime,spec_itemid,spec_type_desc,org_itemid,org_name,isolate_num,ab_itemid,ab_name,dilution_text,dilution_comparison,dilution_value,interpretation
0,130,5282,132174.0,2109-08-05,,70079.0,URINE,80019.0,PROVIDENCIA STUARTII,1,90026.0,PIPERACILLIN/TAZO,64,=,64.0,I
1,131,5282,132174.0,2109-08-05,,70079.0,URINE,80019.0,PROVIDENCIA STUARTII,1,90013.0,TOBRAMYCIN,=>16,=>,16.0,R
2,132,5282,132174.0,2109-08-05,,70079.0,URINE,80019.0,PROVIDENCIA STUARTII,1,90029.0,MEROPENEM,<=0.25,<=,0.0,S
3,133,5282,132174.0,2109-08-05,,70079.0,URINE,80019.0,PROVIDENCIA STUARTII,1,90028.0,CEFEPIME,2,=,2.0,S
4,134,5282,132174.0,2109-08-05,,70079.0,URINE,80019.0,PROVIDENCIA STUARTII,1,90019.0,CIPROFLOXACIN,=>4,=>,4.0,R


### Prescriptions Table

In [None]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM prescriptions;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

### Procedures_ICD Table

In [22]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM procedures_icd;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 240095 rows and  5 columns

Column Names:
row_id
subject_id
hadm_id
seq_num
icd9_code


Unnamed: 0,row_id,subject_id,hadm_id,seq_num,icd9_code
0,397,5259,195496,7,966
1,398,89817,145376,1,3812
2,399,5617,164900,1,8005
3,400,5617,164900,2,8191
4,401,5617,164900,3,3893


## Dictionaries

### D_CPT

In [13]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM d_cpt;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 134 rows and  9 columns

Column Names:
row_id
category
sectionrange
sectionheader
subsectionrange
subsectionheader
codesuffix
mincodeinsubsection
maxcodeinsubsection


Unnamed: 0,row_id,category,sectionrange,sectionheader,subsectionrange,subsectionheader,codesuffix,mincodeinsubsection,maxcodeinsubsection
0,1,1,99201-99499,Evaluation and management,99201-99216,Office/other outpatient services,,99201,99216
1,2,1,99201-99499,Evaluation and management,99217-99220,Hospital observation services,,99217,99220
2,3,1,99201-99499,Evaluation and management,99221-99239,Hospital inpatient services,,99221,99239
3,4,1,99201-99499,Evaluation and management,99241-99255,Consultations,,99241,99255
4,5,1,99201-99499,Evaluation and management,99261-99263,Follow-up inpatient consultations (deleted codes),,99261,99263


### D_ICD_Diagnoses Table

In [14]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM d_icd_diagnoses;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 14567 rows and  4 columns

Column Names:
row_id
icd9_code
short_title
long_title


Unnamed: 0,row_id,icd9_code,short_title,long_title
0,55,65,Amebic brain abscess,Amebic brain abscess
1,56,66,Amebic skin ulceration,Amebic skin ulceration
2,57,68,Amebic infection NEC,Amebic infection of other sites
3,58,69,Amebiasis NOS,"Amebiasis, unspecified"
4,59,70,Balantidiasis,Balantidiasis


### D_ICD_Procedures Table

In [15]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM d_icd_procedures;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 3882 rows and  4 columns

Column Names:
row_id
icd9_code
short_title
long_title


Unnamed: 0,row_id,icd9_code,short_title,long_title
0,86,64,Perc ins extracran stent,Percutaneous insertion of other extracranial a...
1,87,65,Perc ins intracran stent,Percutaneous insertion of intracranial vascula...
2,88,66,PTCA,Percutaneous transluminal coronary angioplasty...
3,89,67,Intravas msmnt thorc art,Intravascular pressure measurement of intratho...
4,90,68,Intravas msmt periph art,Intravascular pressure measurement of peripher...


### D_Items Table

In [16]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM d_items;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 12478 rows and  10 columns

Column Names:
row_id
itemid
label
abbreviation
dbsource
linksto
category
unitname
param_type
conceptid


Unnamed: 0,row_id,itemid,label,abbreviation,dbsource,linksto,category,unitname,param_type,conceptid
0,262,264,INV Line#7 Site Date,,carevue,chartevents,,,,
1,263,265,INV Line#7 [Type],,carevue,chartevents,,,,
2,264,266,INV Line#7 Zero/Cal,,carevue,chartevents,,,,
3,265,267,INV Line#7InsertDate,,carevue,chartevents,,,,
4,266,268,INV Line#7SiteAppear,,carevue,chartevents,,,,


### D_LabItems Table

In [17]:
cur.execute('SET search_path to ' + schema_name)
query = \
"""
SELECT *
FROM d_labitems;
"""
data = pd.read_sql_query(query,con)

print "Table Dimensions:", data.shape[0], "rows and ", data.shape[1], "columns"

print "\nColumn Names:"
for col in data.columns:
    print col
    
data.head()

Table Dimensions: 755 rows and  6 columns

Column Names:
row_id
itemid
label
fluid
category
loinc_code


Unnamed: 0,row_id,itemid,label,fluid,category,loinc_code
0,262,51062,"CHLORIDE, STOOL",STOOL,CHEMISTRY,15158-9
1,263,51063,"OSMOLALITY, STOOL",STOOL,CHEMISTRY,2693-0
2,264,51064,"POTASSIUM, STOOL",STOOL,CHEMISTRY,15202-5
3,265,51065,"SODIUM, STOOL",STOOL,CHEMISTRY,15207-4
4,266,51066,24 HR CALCIUM,URINE,CHEMISTRY,6874-2
