# Exploration of a single ICU sepsis data log

In [1]:
# Imports
import pandas

In [2]:
path = '../training/'

In [3]:
# Choose patient sample
sample_num = 1

In [4]:
# Import and print dataframe layout
def loadICUdata(sample_num, path = './', printdf = False):
    samplestr = 'p' + (str(sample_num) + '.psv').rjust(9, '0')
    fname = path + samplestr
    df = pandas.read_csv(fname, sep = '|')
    print(fname, ' file:')
    if printdf:
        print(df)
    return df

In [5]:
df = loadICUdata(sample_num, path, True)

../training/p00001.psv  file:
       HR  O2Sat  Temp    SBP   MAP   DBP  Resp  EtCO2  BaseExcess  HCO3  \
0     NaN    NaN   NaN    NaN   NaN   NaN   NaN    NaN         NaN   NaN   
1    93.0   92.5   NaN  110.0  76.0  56.0  22.0    NaN         NaN   NaN   
2    91.0   96.0   NaN  108.0  84.5  72.0  23.5    NaN         NaN   NaN   
3    93.0   98.0   NaN  123.0  87.0  61.0  21.0    NaN         NaN   NaN   
4    93.0   95.0   NaN  110.0  81.0  70.0  20.0    NaN         NaN   NaN   
5     NaN    NaN   NaN    NaN   NaN   NaN   NaN    NaN         NaN   NaN   
6    91.5   97.0  36.5  104.0  75.0  60.0  20.0    NaN         NaN   NaN   
7    94.0   95.0   NaN  114.0  85.0  66.0  20.0    NaN         NaN   NaN   
8    94.0   95.0   NaN  121.0  88.0  69.0  20.0    NaN         NaN   NaN   
9   102.0   95.0   NaN  117.0  89.0  70.0  20.0    NaN         NaN   NaN   
10  106.0   97.0  36.8  107.0  82.0  68.0  20.0    NaN         NaN   NaN   
11  107.0   95.0   NaN  112.0  86.0  67.0  20.0    NaN    

In [6]:
df

Unnamed: 0,HR,O2Sat,Temp,SBP,MAP,DBP,Resp,EtCO2,BaseExcess,HCO3,...,WBC,Fibrinogen,Platelets,Age,Gender,Unit1,Unit2,HospAdmTime,ICULOS,SepsisLabel
0,,,,,,,,,,,...,,,,73,1,1,0,-214.64,1,0
1,93.0,92.5,,110.0,76.0,56.0,22.0,,,,...,,,,73,1,1,0,-214.64,2,0
2,91.0,96.0,,108.0,84.5,72.0,23.5,,,,...,,,,73,1,1,0,-214.64,3,0
3,93.0,98.0,,123.0,87.0,61.0,21.0,,,,...,,,,73,1,1,0,-214.64,4,0
4,93.0,95.0,,110.0,81.0,70.0,20.0,,,,...,,,,73,1,1,0,-214.64,5,0
5,,,,,,,,,,,...,,,,73,1,1,0,-214.64,6,0
6,91.5,97.0,36.5,104.0,75.0,60.0,20.0,,,,...,,,,73,1,1,0,-214.64,7,0
7,94.0,95.0,,114.0,85.0,66.0,20.0,,,,...,,,,73,1,1,0,-214.64,8,0
8,94.0,95.0,,121.0,88.0,69.0,20.0,,,,...,,,,73,1,1,0,-214.64,9,0
9,102.0,95.0,,117.0,89.0,70.0,20.0,,,,...,,,,73,1,1,0,-214.64,10,0


In [7]:
# get the sepsis label value
def get_sepsislabel(df):
    sl = df[["SepsisLabel"]]
    count = df['SepsisLabel'].value_counts()
    count_healthy = -1
    count_sep = -1
    
    # An assumption is made that NO sepsis-only records [1,1,1,1,1...] are present --> check 55
    if len(count) == 2:
        count_sep = count[1]
    elif len(count) == 1:
        if sl.values[0] == 0:
            count_healthy = count.values[0]
        elif sl.values[0] == 1:
            count_sep = count.values[0]
    return sl, count_healthy, count_sep


In [8]:
count = df['SepsisLabel'].value_counts()
count.values
len(count)
sl, ch, cs = get_sepsislabel(df)
len(sl)

24

In [9]:
# Print result
SL, ch, cs = get_sepsislabel(df) 
print(ch, cs, cs/len(sl))

24 -1 -0.041666666666666664


# Loop study

In [10]:
# Run in a loop and get a sepsis presence report
SL = []
CH = []
CS = []
CSCH = []
for i in range(1,100):
    df = loadICUdata(i, path, False)
    sl, ch, cs = get_sepsislabel(df) 
    SL.append(sl)
    CH.append(ch)
    CS.append(cs)
    if cs and ch != -1:
        csch = cs/(cs + ch)
    elif cs == -1:
        csch = 0
    elif ch == -1:
        csch = 1
    CSCH.append(csch)

../training/p00001.psv  file:
../training/p00002.psv  file:
../training/p00003.psv  file:
../training/p00004.psv  file:
../training/p00005.psv  file:
../training/p00006.psv  file:
../training/p00007.psv  file:
../training/p00008.psv  file:
../training/p00009.psv  file:
../training/p00010.psv  file:
../training/p00011.psv  file:
../training/p00012.psv  file:
../training/p00013.psv  file:
../training/p00014.psv  file:
../training/p00015.psv  file:
../training/p00016.psv  file:
../training/p00017.psv  file:
../training/p00018.psv  file:
../training/p00019.psv  file:
../training/p00020.psv  file:
../training/p00021.psv  file:
../training/p00022.psv  file:
../training/p00023.psv  file:
../training/p00024.psv  file:
../training/p00025.psv  file:
../training/p00026.psv  file:
../training/p00027.psv  file:
../training/p00028.psv  file:
../training/p00029.psv  file:
../training/p00030.psv  file:
../training/p00031.psv  file:
../training/p00032.psv  file:
../training/p00033.psv  file:
../trainin

In [11]:
print(CSCH)

[-0.043478260869565216, -0.041666666666666664, -0.023809523809523808, -0.017241379310344827, -0.019607843137254902, -0.021739130434782608, -0.027777777777777776, -0.020408163265306121, -0.034482758620689655, -0.066666666666666666, -0.020408163265306121, -0.023809523809523808, 1, -0.023809523809523808, -0.066666666666666666, 1, -0.050000000000000003, -0.023809523809523808, -0.022222222222222223, -0.024390243902439025, -0.027027027027027029, -0.020408163265306121, -0.02564102564102564, -0.052631578947368418, -0.021739130434782608, -0.083333333333333329, -0.052631578947368418, -0.041666666666666664, -0.022222222222222223, -0.020408163265306121, -0.027777777777777776, -0.037037037037037035, -0.050000000000000003, -0.045454545454545456, -0.020408163265306121, -0.027027027027027029, -0.027777777777777776, -0.033333333333333333, -0.021739130434782608, -0.050000000000000003, -0.025000000000000001, -0.033333333333333333, -0.043478260869565216, -0.052631578947368418, -0.026315789473684209, -0.01