# Loading MIMIC III Dataset 

In [58]:
import pandas as pd
pd.options.display.float_format = '{:,}'.format

## Not included in the Working Data Table 
 
We decided to not include the data into the working frame because the information was not relevant to answer the prediction problem. 

* CALLOUT
* CAREGIVERS
* NOTEEVENTS

# Tables containing Patient Data

As all dates in MIMIC-III are anonymized to protect patient confidentiality, all dates in this table have been shifted. Note that the chronology for an individual patient has been unaffected however, and quantities such as the difference between two dates remain true to reality.

## Admissions
The ADMISSIONS table gives information regarding a patient’s admission to the hospital. Since each unique hospital visit for a patient is assigned a unique HADM_ID.
Information available includes timing information for admission and discharge, demographic information, the source of the admission, whether the patient died within the given hospitalization. In addition the table contains a preliminary, free text diagnosis for the patient on hospital admission.

In [59]:
df_admissions = pd.read_csv('data/ADMISSIONS.csv.gz', compression='gzip')

In [60]:
df_admissions.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1
1,22,23,152223,2153-09-03 07:15:00,2153-09-08 19:10:00,,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,,CATHOLIC,MARRIED,WHITE,,,CORONARY ARTERY DISEASE\CORONARY ARTERY BYPASS...,0,1
2,23,23,124321,2157-10-18 19:34:00,2157-10-25 14:00:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Medicare,ENGL,CATHOLIC,MARRIED,WHITE,,,BRAIN MASS,0,1
3,24,24,161859,2139-06-06 16:14:00,2139-06-09 12:48:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Private,,PROTESTANT QUAKER,SINGLE,WHITE,,,INTERIOR MYOCARDIAL INFARCTION,0,1
4,25,25,129635,2160-11-02 02:06:00,2160-11-05 14:55:00,,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,,UNOBTAINABLE,MARRIED,WHITE,2160-11-02 01:01:00,2160-11-02 04:27:00,ACUTE CORONARY SYNDROME,0,1


In [61]:
df_admissions_newborn = df_admissions[df_admissions.ADMISSION_TYPE == "NEWBORN"]
df_admissions_newborn

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA
6,27,27,134931,2191-11-30 22:16:00,2191-12-03 14:45:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,HOME,Private,,CATHOLIC,,WHITE,,,NEWBORN,0,1
20,41,39,106266,2114-11-29 21:04:00,2114-12-09 15:10:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,HOME,Private,,NOT SPECIFIED,,UNKNOWN/NOT SPECIFIED,,,NEWBORN,0,1
27,461,358,110872,2168-10-24 23:48:00,2168-10-29 03:23:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,HOME,Private,,CATHOLIC,,WHITE,,,NEWBORN,0,1
34,468,363,196503,2176-03-01 15:26:00,2176-03-03 14:04:00,,NEWBORN,CLINIC REFERRAL/PREMATURE,HOME,Government,,PROTESTANT QUAKER,,BLACK/AFRICAN AMERICAN,,,NEWBORN,0,1
51,49,50,132761,2112-06-23 19:40:00,2112-06-26 10:15:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,HOME,Private,,OTHER,,WHITE,,,NEWBORN,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43861,39394,32472,190292,2196-02-01 09:13:00,2196-02-03 11:00:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,HOME,Private,,CATHOLIC,,WHITE,,,NEWBORN,0,1
43868,39401,32479,118898,2143-02-25 23:58:00,2143-04-21 17:35:00,,NEWBORN,CLINIC REFERRAL/PREMATURE,HOME,Private,,UNOBTAINABLE,,WHITE,,,NEWBORN,0,1
43879,39412,32490,144531,2131-09-20 15:29:00,2131-09-24 13:00:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,HOME,Private,,CATHOLIC,,WHITE,,,NEWBORN,0,1
43880,39413,32491,146840,2161-10-08 23:32:00,2161-10-11 12:30:00,,NEWBORN,PHYS REFERRAL/NORMAL DELI,HOME,Private,,HINDU,,ASIAN - ASIAN INDIAN,,,NEWBORN,0,1


In [62]:
df_admissions_adults = df_admissions[df_admissions.ADMISSION_TYPE != "NEWBORN"]

In [63]:
df_admissions_adults.shape

(51113, 19)

In [64]:
liste_patienten_adult = list(df_admissions_adults.HADM_ID.unique())
liste_patienten_adult

[165315,
 152223,
 124321,
 161859,
 129635,
 197661,
 162569,
 104557,
 128652,
 175413,
 176176,
 115799,
 144319,
 166707,
 182104,
 122659,
 165660,
 188670,
 185910,
 101757,
 174486,
 145674,
 122609,
 101651,
 117876,
 144265,
 154871,
 108205,
 148959,
 142749,
 150873,
 136153,
 139873,
 134462,
 119940,
 105889,
 138061,
 145787,
 123421,
 113500,
 144383,
 119203,
 146828,
 181750,
 157907,
 144073,
 190539,
 190797,
 155385,
 190665,
 181711,
 104130,
 176332,
 189535,
 116009,
 195961,
 172056,
 143430,
 186474,
 155252,
 170467,
 108329,
 111944,
 194730,
 112086,
 142768,
 100536,
 181542,
 115385,
 175016,
 158569,
 120969,
 166401,
 116630,
 112077,
 190243,
 123010,
 121205,
 183686,
 140037,
 160891,
 170324,
 127870,
 188606,
 187373,
 153952,
 175533,
 130744,
 133550,
 161160,
 128744,
 145167,
 191941,
 182383,
 174162,
 123552,
 183350,
 128755,
 164029,
 108375,
 193281,
 175347,
 110668,
 166018,
 170149,
 147469,
 112906,
 134369,
 138376,
 157348,
 141647,
 

In [65]:
df_admissions.shape

(58976, 19)

58976 Admissions and 19 columns

In [66]:
df_admissions.HADM_ID.nunique()

58976

genauso viele verschiedene Admission-IDs wie Zeilen. Also keine Duplikate.

In [67]:
df_admissions.SUBJECT_ID.nunique()

46520

Insgesamt 46520 Patienten, d.h. einige Patienten wurden mehrmals ins Krankenhaus eingeliefert.

In [68]:
len(df_admissions[df_admissions.DIAGNOSIS =='ARDS'])

12

In [69]:
len(df_admissions[df_admissions.DIAGNOSIS =='RDS'])

0

In [70]:
len(df_admissions[df_admissions.DIAGNOSIS =='ACUTE RESPIRATORY DISTRESS SYNDROME'])

9

In [71]:
df_admissions_del=df_admissions.dropna(subset=["DIAGNOSIS"])

In [72]:
df_admissions_del[df_admissions_del.DIAGNOSIS.str.contains("RESPIRATORY")]

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA
106,104,101,175533,2196-09-26 18:36:00,2196-10-12 13:17:00,2196-10-12 13:17:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,,,MARRIED,ASIAN,2196-09-26 12:50:00,2196-09-26 18:37:00,RESPIRATORY FAILURE,1,1
360,630,505,116719,2154-03-30 14:59:00,2154-04-01 16:00:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Private,ENGL,CATHOLIC,MARRIED,WHITE,,,ACUTE MENTAL STATUS CHANGES/RESPIRATORY DISTRESS,0,1
613,1100,886,130937,2139-03-08 10:47:00,2139-03-09 17:05:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,REHAB/DISTINCT PART HOSP,Medicare,,,,UNKNOWN/NOT SPECIFIED,,,RESPIRATORY FAILURE,0,1
650,1137,914,124723,2178-02-26 12:14:00,2178-03-13 15:30:00,,EMERGENCY,CLINIC REFERRAL/PREMATURE,LEFT AGAINST MEDICAL ADVI,Medicare,,CATHOLIC,SINGLE,WHITE,,,HYPOTENSION;RESPIRATORY DECOMPRESSION;S/P LIVE...,0,1
682,835,679,156345,2141-10-19 14:58:00,2141-10-24 11:30:00,,EMERGENCY,EMERGENCY ROOM ADMIT,REHAB/DISTINCT PART HOSP,Medicare,,JEWISH,MARRIED,WHITE,2141-10-19 10:21:00,2141-10-19 15:08:00,ACUTE RESPIRATORY FAILURE,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58428,58861,99613,177517,2153-02-10 01:00:00,2153-02-26 17:00:00,,EMERGENCY,EMERGENCY ROOM ADMIT,LONG TERM CARE HOSPITAL,Medicare,ENGL,CATHOLIC,DIVORCED,WHITE,2153-02-09 23:59:00,2153-02-10 02:56:00,RESPIRATORY FAILURE;PNEUMONIA;CHRONIC OBST PUL...,0,1
58623,58278,97772,119334,2124-07-18 23:39:00,2124-07-19 12:30:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,SHORT TERM HOSPITAL,Medicare,PTUN,PROTESTANT QUAKER,,UNKNOWN/NOT SPECIFIED,,,RESPIRATORY FAILURE,0,1
58814,56986,93814,134288,2194-07-22 14:42:00,2194-07-24 17:30:00,,EMERGENCY,CLINIC REFERRAL/PREMATURE,LONG TERM CARE HOSPITAL,Private,ENGL,UNOBTAINABLE,,UNABLE TO OBTAIN,,,CHRONIC RESPIRATORY FAILURE; TRAC OBSTRUCTED A...,0,1
58829,57758,96229,195294,2143-02-09 13:24:00,2143-02-10 07:52:00,,EMERGENCY,CLINIC REFERRAL/PREMATURE,LEFT AGAINST MEDICAL ADVI,Private,ENGL,CATHOLIC,SINGLE,WHITE,2143-02-09 12:41:00,2143-02-09 15:00:00,RESPIRATORY ARREST,0,1


Future Work:
* Duplicates 
* ADMITTIME / DISCHTIME - calculate LOS 
* Ectract all ADMISSION TYPE expect NEWBORN 
* Drop: Admission Location, Discharge Location, Insurance, Language, Religion, Marietal Status, Ethnicity, EDREGTIME, EDOUTTIME, DIAGNOSIS

## CPTevents
Procedures recorded as Current Procedural Terminology (CPT) codes.

In [73]:
df_cpt = pd.read_csv('data/CPTEVENTS.csv.gz', compression='gzip')

  interactivity=interactivity, compiler=compiler, result=result)


In [74]:
df_cpt.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,COSTCENTER,CHARTDATE,CPT_CD,CPT_NUMBER,CPT_SUFFIX,TICKET_ID_SEQ,SECTIONHEADER,SUBSECTIONHEADER,DESCRIPTION
0,317,11743,129545,ICU,,99232,99232.0,,6.0,Evaluation and management,Hospital inpatient services,
1,318,11743,129545,ICU,,99232,99232.0,,7.0,Evaluation and management,Hospital inpatient services,
2,319,11743,129545,ICU,,99232,99232.0,,8.0,Evaluation and management,Hospital inpatient services,
3,320,11743,129545,ICU,,99232,99232.0,,9.0,Evaluation and management,Hospital inpatient services,
4,321,6185,183725,ICU,,99223,99223.0,,1.0,Evaluation and management,Hospital inpatient services,


In [75]:
df_cpt.shape

(573146, 12)

In [76]:
df_cpt.HADM_ID.isnull().sum()

0

## DATETIMEEVENTS
DATETIMEEVENTS contains all date measurements about a patient in the ICU. For example, the date of last dialysis would be in the DATETIMEEVENTS table, but the systolic blood pressure would be in the table CHARTEVENTS.   
Wir hoffen, in dieser Tabelle die Information darüber, wann ein Patient intubiert und extubiert wurde, zu erhalten.


In [77]:
df_datetimeevents = df.read_csv('data/DATETIMEEVENTS.csv.gz', 
                                compression='gzip', dtype={'HADM_ID': 'float64','ICUSTAY_ID': 'float64'}, blocksize=None)

NameError: name 'df' is not defined

In [15]:
df_datetimeevents.head(10)

In [27]:
df_datetimeevents.shape

In [41]:
df_datetimeevents.HADM_ID.isnull().sum()

Auf diese Datetimeevents verzichten wir.

In [16]:
df_DATETIMEEVENTS.ITEMID.unique()

## Diagnosis_ICD
SEQ_NUM provides the order in which the ICD diagnoses relate to the patient. ICD diagnoses are ordered by priority - and the order does have an impact on the reimbursement for treatment.

In [7]:
df_diagnosis = pd.read_csv('data/DIAGNOSES_ICD.csv.gz', 
                                compression='gzip')

In [20]:
df_diagnosis.head()

In [28]:
df_diagnosis.shape

In [42]:
df_diagnosis.HADM_ID.isnull().sum()

In [30]:
df_diagnosis[df_diagnosis.ICD9_CODE == '51881']

In [51]:
df_diagnosis[df_diagnosis.ICD9_CODE == '51881'].SUBJECT_ID.nunique()

In [33]:
df_diagnosis[df_diagnosis.ICD9_CODE == '51881'].HADM_ID.nunique()

Es gibt Patienten, die mehrmals mit der Diagnose "ARDS" im Krankenhaus waren.

In [8]:
df_patienten = df_diagnosis[df_diagnosis.ICD9_CODE == '51881']

In [9]:
liste_patienten = list(df_patienten.HADM_ID.unique())
liste_patienten


In [64]:
import csv
with open('data/liste_patienten.csv', 'w', encoding='UTF-8') as myfile:
    wr = csv.writer(myfile)
    wr.writerow(liste_patienten)

In [57]:
k = 0 
for i in liste_patienten_adult:
    if i in liste_patienten:
        k += 1
print(k)

In [31]:
# Liste mit Patienten mit ARDS anhand von SUBJECT_ID
liste_subject = list(df_patienten.SUBJECT_ID.unique())

In [48]:
len(df_diagnosis[df_diagnosis.ICD9_CODE == '769'])

Neugeborene mit ARDS haben eine andere Diagnose (769). Deshalb ist liste_patienten nur mit erwachsenen ARDS-Patienten gefüllt.

## drgcodes
Diagnosis Related Groups (DRG), which are used by the hospital for billing purposes. 
Bisher wissen wir nicht, wofür wir diese Informationen nutzen können.

In [46]:
df_drgcodes = pd.read_csv('data/DRGCODES.csv.gz', 
                                compression='gzip')

In [57]:
df_drgcodes.head()

In [58]:
df_drgcodes.shape

In [47]:
df_drgcodes.HADM_ID.isnull().sum()

In [59]:
df_drgcodes[df_drgcodes.HADM_ID == 110404]

## icustays
Every unique ICU stay in the database.  
CCU - Coronary Care Unit   
CSRU - Cardiac Surgery Recovery Unit   
MICU - Medical Intensive Care Unit  
NICU - Neonatal Intensive Care Unit   
SICU - Surgical Intensive Care Unit   
TSICU - Trauma Surgical Intensive Care Unit

In [45]:
df_icustays = pd.read_csv('data/ICUSTAYS.csv.gz', 
                                compression='gzip')

In [62]:
df_icustays.head()

In [48]:
df_icustays.HADM_ID.isnull().sum()

In [65]:
df_icustays.FIRST_CAREUNIT.unique()

In [66]:
df_icustays.FIRST_WARDID.unique()

## Inputevent 
Inputs and outputs are extremely useful when studying intensive care unit patients. Inputs are any fluids which have been administered to the patient: such as oral or tube feedings or intravenous solutions containing medications. 
### CV 
Inputs exist in two separate tables: INPUTEVENTS_CV and INPUTEVENTS_MV. INPUTEVENTS_CV contains CareVue inputs, while INPUTEVENTS_MV contains Metavision inputs. Results from these tables can be unioned as observations are not duplicated across tables.
For CareVue data, the rate and volume will be asynchronous, and only the CHARTTIME will be available. For rates, the CHARTTIME will correspond to a start time (when the drug was set to that rate). For volumes, the CHARTTIME will correspond to an end time.


In [29]:
df_input_cv = pd.read_csv('data/INPUTEVENTS_CV.csv.gz', 
                                compression='gzip')

In [69]:
df_input_cv.head()

In [70]:
df_input_cv.shape

In [30]:
df_input_cv.HADM_ID.isnull().sum()

Wir suchen von den Zeilen mit HADM_ID = NaN diejenigen heraus, wo die Subject_ID in der Liste der ARDS_Patienten ist.

In [49]:
df_input_null= df_input_cv.loc[df_input_cv['HADM_ID'].isnull()]

In [52]:
df_input_null_ards= df_input_null.loc[df_input_null['SUBJECT_ID'].isin(liste_subject)]

In [53]:
df_input_null_ards.shape

We reduce the dataframe of patients with the diagnoses ARDS, based on HADM_ID.

In [71]:
df_input_cv= df_input_cv.loc[df_input_cv['HADM_ID'].isin(liste_patienten)]

In [72]:
df_input_cv.shape

### Inputevent MV

In [10]:
df_input_mv = pd.read_csv('data/INPUTEVENTS_MV.csv.gz', 
                                compression='gzip')

In [11]:
df_input_mv.head()

In [12]:
df_input_mv.shape

In [27]:
df_input_mv.HADM_ID.isnull().sum()

We reduce the dataframe of patients with the diagnoses ARDS, based on HADM_ID.

In [15]:
df_input_mv= df_input_mv.loc[df_input_mv['HADM_ID'].isin(liste_patienten)]

In [16]:
df_input_mv.shape

## Labevents
Contains all laboratory measurements for a given patient, including out patient data.
Note that the time associated with this result is the time of the fluid acquisition, not the time that the values were made available to the clinical staff.  
Some items are duplicated between the labevents and chartevents tables. In cases where there is disagreement between measurements, labevents should be taken as the ground truth.  
FLAG indicates whether the laboratory value is considered abnormal or not, using pre-defined thresholds.


In [17]:
df_labevents = pd.read_csv('data/LABEVENTS.csv.gz', 
                                compression='gzip')

In [18]:
df_labevents.head()

In [19]:
df_labevents.shape

In [21]:
# HADM_ID have NaN. Howmany?
df_labevents.HADM_ID.isnull().sum()

**MIST** Was machen wir nun?  
Wir holen die Zeilen des dataframes raus mit HADM_ID = NaN und schauen, ob Patienten (SUCJECT_ID) mit ARDS dabei sind.

In [34]:
df_lab_null= df_labevents.loc[df_labevents['HADM_ID'].isnull()]

In [35]:
df_lab_null.head()

In [38]:
df_lab_null_ards= df_lab_null.loc[df_lab_null['SUBJECT_ID'].isin(liste_subject)]

In [39]:
df_lab_null_ards.shape

1,3 Mio Labormessungen von Patienten mit ARDS sind nicht mit HADM_ID versehen.

In [24]:
df_labevents.HADM_ID.nunique()

In [22]:
df_labevents.SUBJECT_ID.isnull().sum()

In [26]:
df_labevents.SUBJECT_ID.nunique()

We reduce the dataframe of patients with the diagnoses ARDS, based on HADM_ID.

## microbiologyevents
Contains microbiology information, including cultures acquired and associated sensitivities. 


In [55]:
df_microbiology = pd.read_csv('data/MICROBIOLOGYEVENTS.csv.gz', 
                                compression='gzip')

In [56]:
df_microbiology.head()

In [57]:
df_microbiology.shape

In [58]:
df_microbiology.HADM_ID.isnull().sum()

## outputevents
Output information for patients while in the ICU

In [59]:
df_output = pd.read_csv('data/OUTPUTEVENTS.csv.gz', 
                                compression='gzip')

In [60]:
df_output.head()

In [61]:
df_output.shape

In [62]:
df_output.HADM_ID.isnull().sum()

## patients 
Every unique patient in the database.

In [63]:
df_patient = pd.read_csv('data/PATIENTS.csv.gz', 
                                compression='gzip')

In [64]:
df_patient.head()

In [65]:
df_patient.shape

## prescriptions
Medications ordered for a given patient.

In [67]:
df_prescription = pd.read_csv('data/PRESCRIPTIONS.csv.gz', 
                                compression='gzip')

In [68]:
df_prescription.head()

In [69]:
df_prescription.shape

In [71]:
df_prescription.HADM_ID.isnull().sum()

## procedureevents
Patient procedures for the subset of patients who were monitored in the ICU using the iMDSoft MetaVision system.

In [72]:
df_procedure = pd.read_csv('data/PROCEDUREEVENTS_MV.csv.gz', 
                                compression='gzip')

In [73]:
df_procedure.head()

In [74]:
df_procedure.shape

In [75]:
df_procedure.HADM_ID.isnull().sum()

## procedures_icd 
Patient procedures, coded using the International Statistical Classification of Diseases and Related Health Problems (ICD) system.

In [77]:
df_procedure_icd = pd.read_csv('data/PROCEDURES_ICD.csv.gz', 
                                compression='gzip')

In [78]:
df_procedure_icd.head()

In [80]:
df_procedure_icd.shape

In [81]:
df_procedure_icd.HADM_ID.isnull().sum()

In [83]:
#How many times patients were intubated
len(df_procedure_icd[df_procedure_icd.ICD9_CODE == 9604])

## Services
Lists services that a patient was admitted/transferred under.
The services table describes the service that a patient was admitted under. While a patient can be physicially located at a given ICU type (say MICU), they are not necessarily being cared for by the team which staffs the MICU. This can happen due to a number of reasons, including bed shortage. The SERVICES table should be used if interested in identifying the type of service a patient is receiving in the hospital. For example, if interested in identifying surgical patients, the recommended method is searching for patients admitted under a surgical service.  
CMED - Cardiac Medical - for non-surgical cardiac related admissions  
CSURG - Cardiac Surgery - for surgical cardiac admissions  
DENT - Dental - for dental/jaw related admissions  
ENT - Ear, nose, and throat - conditions primarily affecting these areas  
GU - Genitourinary - reproductive organs/urinary system  
GYN - Gynecological - female reproductive systems and breasts  
MED - Medical - general service for internal medicine  
NB - Newborn - infants born at the hospital  
NBB - Newborn baby - infants born at the hospital  
NMED - Neurologic Medical - non-surgical, relating to the brain  
NSURG - Neurologic Surgical - surgical, relating to the brain  
OBS - Obstetrics - conerned with childbirth and the care of women giving birth  
ORTHO - Orthopaedic - surgical, relating to the musculoskeletal system  
OMED - Orthopaedic medicine - non-surgical, relating to musculoskeletal system  
PSURG - Plastic - restortation/reconstruction of the human body (including cosmetic or aesthetic)  
PSYCH - Psychiatric - mental disorders relating to mood, behaviour, cognition, or perceptions  
SURG - Surgical - general surgical service not classified elsewhere  
TRAUM - Trauma - injury or damage caused by physical harm from an external source  
TSURG - Thoracic Surgical - surgery on the thorax, located between the neck and the abdomen  
VSURG - Vascular Surgical - surgery relating to the circulatory system  


In [85]:
df_services = pd.read_csv('data/SERVICES.csv.gz', 
                                compression='gzip')

In [86]:
df_services.head()

In [88]:
df_services.shape

In [89]:
df_services.HADM_ID.isnull().sum()

## transfers
Patient movement from bed to bed within the hospital, including ICU admission and discharge.

In [90]:
df_transfers = pd.read_csv('data/TRANSFERS.csv.gz', 
                                compression='gzip')

In [91]:
df_transfers.head()

In [92]:
df_transfers.shape

In [93]:
df_transfers.HADM_ID.isnull().sum()

# Dictionaries

## D_CPT
 
High-level definitions for current procedural terminology (CPT) codes. Remains in the working data table because it might be useful for later explanation. 

In [12]:
df_dcpt = pd.read_csv('data/D_CPT.csv.gz', compression='gzip')


In [93]:
df_dcpt.head(10)

In [14]:
df_dcpt.SECTIONHEADER.unique()

In [15]:
df_dcpt.shape

In [19]:
df_dcpt.SECTIONHEADER.unique()

In [20]:
df_dcpt.SUBSECTIONHEADER.unique()

## D_ICD Diagnosis 
 
Select patients that suffer from 'acute respiratory failure' based in ICD9 Code (51881). 

In [32]:
df_icd_diagnosis = df.read_csv('data/D_ICD_DIAGNOSES.csv.gz', 
                               compression='gzip', blocksize=None, 
                              dtype={'ICD9_CODE': 'object'})

# WATCH OUT - ICD9 is now an OBJECT

In [33]:
df_icd_diagnosis.head()

In [34]:
df_icd_diagnosis[dd_icd_diagnosis.ICD9_CODE == '51881']

Der IDC-9 Code für ARDS ist 51881.

In [35]:
df_icd_diagnosis[df_icd_diagnosis.ICD9_CODE == '99591']

## D_ICD Procedures 
 
Extubation failure is defined as a re-intubation within XX time. 


In [36]:
d_icd_procedures = df.read_csv('data/D_ICD_PROCEDURES.csv.gz', 
                               compression='gzip', blocksize=None)

In [37]:
df_icd_procedures.head()

In [38]:
df_icd_procedures[df_icd_procedures.SHORT_TITLE == 'Insert endotracheal tube']

## D_Items Table 
 
The D_ITEMS table does not link to the LABEVENTS table, as this data was acquired separately from the hospital database. The D_ITEMS table was acquired from the ICU databases.


##### Problem
Cant identify label for intubation

In [39]:
df_items = df.read_csv('data/D_ITEMS.csv.gz', 
                               compression='gzip', blocksize=None, 
                       dtype={'ABBREVIATION': 'object',
                              'PARAM_TYPE': 'object',
                              'UNITNAME': 'object'})

In [40]:
df_items.head(3)

In [65]:
df_items[df_items.LABEL == 'endotracheal tube']

In [76]:
df_items[df_items.ITEMID == 225792]

In [90]:
df_items[dd_items.ITEMID == 225368] 

In [75]:
df_items= df_items.dropna(subset=["LABEL"])

In [95]:
df_items[df_items.LABEL.str.contains("trach")]

In [91]:
len(df_items)

## D_Lab Items

In [44]:
df_labitems= df.read_csv('data/D_LABITEMS.csv.gz', 
                               compression='gzip', blocksize=None)

In [47]:
df_labitems.head()

In [None]:
df_labitems= df.read_csv('data/D_LABITEMS.csv.gz', 
                               compression='gzip', blocksize=None)