In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import scipy.stats as stat
import statsmodels.api as sm
import scikit_posthocs as sp

pd.set_option('display.max_columns', 9999)
pd.set_option('display.max_rows', 64)

%matplotlib inline

In [2]:
pts = pd.read_csv('final_patients_full.csv')

In [5]:
pts.head()

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,admissionheight,hospitaladmittime24,hospitaladmitoffset,hospitaladmitsource,hospitaldischargeyear,hospitaldischargetime24,hospitaldischargeoffset,hospitaldischargelocation,hospitaldischargestatus,unittype,unitadmittime24,unitadmitsource,unitvisitnumber,unitstaytype,admissionweight,dischargeweight,unitdischargetime24,unitdischargeoffset,unitdischargelocation,unitdischargestatus,uniquepid,opioid,nsaid,numbedscategory,teachingstatus,region,painmeds,apachescore,actualventdays,gcs_score,from_OR,heartrate,hr_over100,ICU_duration,orderedprotocols,BMI
0,154752,139450.0,Male,53.0,Caucasian,73.0,100.0,"CVA, cerebrovascular accident/stroke",182.9,18:22:13,-32.0,Emergency Department,2015.0,16:58:00,9964.0,Rehabilitation,Alive,Neuro ICU,18:54:00,Emergency Department,1.0,admit,113.4,115.6,21:30:00,1596.0,Floor,Alive,002-57840,True,True,>= 500,t,Midwest,True,86.0,0.0,3.0,False,70.0,False,1.130556,['Tissue Plasminogen Activator (tPA)'],33.898903
1,156771,140989.0,Female,88.0,Caucasian,63.0,95.0,"CVA, cerebrovascular accident/stroke",152.4,17:15:00,-14.0,Direct Admit,2014.0,18:26:00,14457.0,Skilled Nursing Facility,Alive,Med-Surg ICU,17:29:00,Direct Admit,1.0,admit,64.6,74.9,05:33:00,724.0,Floor,Alive,002-27764,True,True,100 - 249,f,Midwest,True,42.0,0.0,14.0,False,60.0,False,0.5125,['Tissue Plasminogen Activator (tPA)'],27.813945
2,173247,153714.0,Female,81.0,Caucasian,73.0,100.0,"CVA, cerebrovascular accident/stroke",154.9,16:32:17,-12.0,Emergency Department,2015.0,17:32:00,8688.0,Rehabilitation,Alive,Neuro ICU,16:44:00,Emergency Department,1.0,admit,90.2,86.4,23:06:00,4702.0,Floor,Alive,002-6009,False,True,>= 500,t,Midwest,True,42.0,0.0,15.0,False,72.0,False,3.273611,['Tissue Plasminogen Activator (tPA)'],37.592716
3,196674,172026.0,Male,80.0,,73.0,100.0,"CVA, cerebrovascular accident/stroke",170.2,20:47:00,-1.0,Other Hospital,2015.0,22:41:00,4433.0,Other Hospital,Alive,Neuro ICU,20:48:00,Other Hospital,1.0,admit,86.1,86.2,18:41:00,2753.0,Floor,Alive,002-36867,False,True,>= 500,t,Midwest,True,44.0,0.0,15.0,False,56.0,False,1.9125,['Tissue Plasminogen Activator (tPA)'],29.722411
4,196675,172026.0,Male,80.0,,73.0,100.0,"Pneumonia, other",170.2,20:47:00,-3186.0,Other Hospital,2015.0,22:41:00,1248.0,Other Hospital,Alive,Neuro ICU,01:53:00,Floor,2.0,readmit,86.2,87.5,22:41:00,1248.0,Other Hospital,Alive,002-36867,False,False,>= 500,t,Midwest,False,46.0,1.0,12.0,False,87.0,False,3.079167,['Tissue Plasminogen Activator (tPA)'],29.756932


In [4]:
pts.drop(columns=['Unnamed: 0'], inplace=True)

In [7]:
nurse = pd.read_csv('eicu-collaborative-research-database-2.0/nurseCharting.csv.gz')

In [34]:
nurse.head()

Unnamed: 0,patientunitstayid,nursingchartoffset,nursingchartvalue
7,141924,2508,2
16,141924,634,2
40,141924,6664,5
71,141924,12491,3
105,141924,5542,3


In [16]:
nurse = nurse[nurse.nursingchartcelltypevalname == 'Pain Score']
nurse.head()

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue
7,257177082,141924,2508,2508,Scores,Pain Score/Goal,Pain Score,2
16,221064311,141924,634,634,Scores,Pain Score/Goal,Pain Score,2
40,94517471,141924,6664,6664,Scores,Pain Score/Goal,Pain Score,5
71,257096927,141924,12491,12491,Scores,Pain Score/Goal,Pain Score,3
105,184777069,141924,5542,5542,Scores,Pain Score/Goal,Pain Score,3


In [17]:
nurse.drop(columns=['nursingchartid', 
                    'nursingchartentryoffset', 
                    'nursingchartcelltypecat', 
                    'nursingchartcelltypevallabel',
                    'nursingchartcelltypevalname'], 
           inplace=True)

In [35]:
nurse.head()

Unnamed: 0,patientunitstayid,nursingchartoffset,nursingchartvalue
7,141924,2508,2
16,141924,634,2
40,141924,6664,5
71,141924,12491,3
105,141924,5542,3


In [36]:
valid = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]

nurse['nursingchartvalue'] = nurse['nursingchartvalue'].apply(lambda x: float(x))
nurse = nurse[nurse['nursingchartvalue'].isin(valid)]

nurse['nursingchartvalue'] = nurse['nursingchartvalue'].apply(lambda x: int(x))

In [37]:
grouped = nurse.groupby('patientunitstayid').apply(lambda x: x.sort_values('nursingchartoffset'))

In [38]:
grouped.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,patientunitstayid,nursingchartoffset,nursingchartvalue
patientunitstayid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
141168,151470604,141168,21,0
141168,151470668,141168,396,0
141168,151470621,141168,561,0
141168,151470689,141168,809,0
141168,151470605,141168,1066,0


In [39]:
first_values = grouped.drop_duplicates(subset='patientunitstayid', keep='first')
last_values = grouped.drop_duplicates(subset='patientunitstayid', keep='last')

In [40]:
first_values = first_values[['nursingchartvalue']]
last_values = last_values[['nursingchartvalue']]

In [41]:
first_values

Unnamed: 0_level_0,Unnamed: 1_level_0,nursingchartvalue
patientunitstayid,Unnamed: 1_level_1,Unnamed: 2_level_1
141168,151470604,0
141178,151470843,0
141179,151471017,0
141194,151473088,0
141196,151473543,0
...,...,...
3352758,151378827,8
3352781,151381527,1
3352837,151390797,2
3353077,151432702,4


In [42]:
last_values

Unnamed: 0_level_0,Unnamed: 1_level_0,nursingchartvalue
patientunitstayid,Unnamed: 1_level_1,Unnamed: 2_level_1
141168,151470691,0
141178,151470901,0
141179,151471111,0
141194,151472300,0
141196,151473438,0
...,...,...
3352758,151378827,8
3352781,151380827,1
3352837,151390857,1
3353077,151432702,4


In [43]:
first_tidy = first_values.rename(columns = {'nursingchartvalue': 'initialPain'}, inplace = False)
last_tidy = last_values.rename(columns = {'nursingchartvalue': 'finalPain'}, inplace = False)

In [44]:
pain = pd.merge(first_tidy, last_tidy, on=['patientunitstayid'], how = 'inner')

In [45]:
pain

Unnamed: 0_level_0,initialPain,finalPain
patientunitstayid,Unnamed: 1_level_1,Unnamed: 2_level_1
141168,0,0
141178,0,0
141179,0,0
141194,0,0
141196,0,0
...,...,...
3352758,8,8
3352781,1,1
3352837,2,1
3353077,4,4


In [46]:
pain.to_csv('pain.csv')