### Import Libraries

In [40]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from statistics import mean
from pandas.io import gbq
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, roc_auc_score, roc_curve, auc

pd.options.mode.chained_assignment = None  # default='warn'

### Logistic Regression

### BINARY CLASSIFICATION, SINGLEFEATURE: AUC

In [49]:
# Load Data
query = """
        SELECT *
        FROM `bachelorarbeit-heparin.mimic_data.cohort1_final`
        """
df = gbq.read_gbq(query, project_id = "bachelorarbeit-heparin")

# Binning PTT Values:
for i in range(len(df['PTT'])):
    
    if df['PTT'][i] < 60 or df['PTT'][i] > 100:
        df['PTT'][i] = 0
    else: 
        df['PTT'][i] = 1
        
# Define x & y:
x = pd.DataFrame({'kum_hep_t':                        df['kum_hep_t']})                 
y = pd.DataFrame({'PTT':                              df['PTT']})

# Logistic Regression:
auc_score = []

for i in range(100):
    
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=i)
    log_reg = LogisticRegression(max_iter=1000).fit(x_train, y_train.values.ravel())
    auc_score.append(roc_auc_score(y_test,log_reg.predict_proba(x_test)[:, 1]))

# Evaluate the Model
print("AVG (n=100) AUC-Score: ", round(mean(auc_score),4))


Downloading:   0%|                                                                         | 0/15376 [00:00<?, ?rows/s][A
Downloading: 100%|███████████████████████████████████████████████████████████| 15376/15376 [00:04<00:00, 3228.86rows/s][A


AVG (n=100) AUC-Score:  0.6188


### BINARY CLASSIFICATION, MULTIFEATURE: AUC

In [52]:
# Load Data
query = """
        SELECT *
        FROM `bachelorarbeit-heparin.mimic_data.cohort1_final`
        """
df = gbq.read_gbq(query, project_id = "bachelorarbeit-heparin")

# New columns as features:
ethnicity_WHITE = []
ethnicity_BLACK_AFRICAN_AMERICAN = []
ethnicity_HISPANIC_LATINO = []
ethnicity_ASIAN = []
ethnicity_MISC = []
ICU_Type_CCU = []
ICU_Type_CVICU = []
ICU_Type_TSICU = []
ICU_Type_MICU = []
ICU_Type_SICU = []
ICU_Type_NEURO = []
ICU_Type_MISC = []

# Convert gender to binary variables:
for i in range(len(df['gender'])):
    
    if df['gender'][i] == 'M':
        df['gender'][i] = 0
    elif df['gender'][i] == 'F':
        df['gender'][i] = 1
        
# Binning PTT Values:
for i in range(len(df['PTT'])):
    
    if df['PTT'][i] < 60 or df['PTT'][i] > 100:
        df['PTT'][i] = 0
    else: 
        df['PTT'][i] = 1
        
# Convert ethnicities to binary variables:
for i in range(len(df['ethnicity'])):
    
    if df['ethnicity'][i] == 'WHITE':
        ethnicity_WHITE.append(1)
    else:
        ethnicity_WHITE.append(0)
        
    if df['ethnicity'][i] == 'BLACK/AFRICAN AMERICAN':
        ethnicity_BLACK_AFRICAN_AMERICAN.append(1)
    else:
        ethnicity_BLACK_AFRICAN_AMERICAN.append(0)
        
    if df['ethnicity'][i] == 'HISPANIC/LATINO':
        ethnicity_HISPANIC_LATINO.append(1)
    else:
        ethnicity_HISPANIC_LATINO.append(0)
    
    if df['ethnicity'][i] == 'ASIAN':
        ethnicity_ASIAN.append(1)
    else:
        ethnicity_ASIAN.append(0)
    
    if df['ethnicity'][i] != 'WHITE' and df['ethnicity'][i] != 'BLACK/AFRICAN AMERICAN' and df['ethnicity'][i] != 'HISPANIC/LATINO' and df['ethnicity'][i] != 'ASIAN':
        ethnicity_MISC.append(1)
    else:
        ethnicity_MISC.append(0)
        
# Convert ICU_Types to binary variables:
for i in range(len(df['ICU_Type'])):
    
    if df['ICU_Type'][i] == 'Coronary Care Unit (CCU)':
        ICU_Type_CCU.append(1)
    else:
        ICU_Type_CCU.append(0)
        
    if df['ICU_Type'][i] == 'Cardiac Vascular Intensive Care Unit (CVICU)':
        ICU_Type_CVICU.append(1)
    else:
        ICU_Type_CVICU.append(0)
        
    if df['ICU_Type'][i] == 'Medical Intensive Care Unit (MICU)':
        ICU_Type_MICU.append(1)
    else:
        ICU_Type_MICU.append(0)
        
    if df['ICU_Type'][i] == 'Trauma SICU (TSICU)':
        ICU_Type_TSICU.append(1)
    else:
        ICU_Type_TSICU.append(0)
    
    if df['ICU_Type'][i] == 'Surgical Intensive Care Unit (SICU)':
        ICU_Type_SICU.append(1)
    else:
        ICU_Type_SICU.append(0)
        
    if df['ICU_Type'][i] == 'Neuro Intermediate' or df['ICU_Type'][i] == 'Neuro Surgical Intensive Care Unit (Neuro SICU)' or df['ICU_Type'][i] == 'Neuro Stepdown':
        ICU_Type_NEURO.append(1)
    else:
        ICU_Type_NEURO.append(0)
        
    if df['ICU_Type'][i] != 'Neuro Intermediate' and df['ICU_Type'][i] != 'Neuro Surgical Intensive Care Unit (Neuro SICU)' and df['ICU_Type'][i] != 'Neuro Stepdown' and df['ICU_Type'][i] != 'Coronary Care Unit (CCU)' and df['ICU_Type'][i] != 'Cardiac Vascular Intensive Care Unit (CVICU)' and df['ICU_Type'][i] != 'Medical Intensive Care Unit (MICU)' and df['ICU_Type'][i] != 'Trauma SICU (TSICU)' and df['ICU_Type'][i] != 'Surgical Intensive Care Unit (SICU)':
        ICU_Type_MISC.append(1)
    else:
        ICU_Type_MISC.append(0)

# Add new Features to the data:
df['ethnicity_WHITE'] = ethnicity_WHITE
df['ethnicity_BLACK_AFRICAN_AMERICAN'] = ethnicity_BLACK_AFRICAN_AMERICAN
df['ethnicity_HISPANIC_LATINO'] = ethnicity_HISPANIC_LATINO
df['ethnicity_ASIAN'] = ethnicity_ASIAN
df['ethnicity_MISC'] = ethnicity_MISC
df['ICU_Type_CCU'] = ICU_Type_CCU
df['ICU_Type_CVICU'] = ICU_Type_CVICU
df['ICU_Type_TSICU'] = ICU_Type_TSICU
df['ICU_Type_MICU'] = ICU_Type_MICU
df['ICU_Type_SICU'] = ICU_Type_SICU
df['ICU_Type_NEURO'] = ICU_Type_NEURO
df['ICU_Type_MISC'] = ICU_Type_MISC
        
# Define x & y:
x = pd.DataFrame({'kum_hep_t':                        df['kum_hep_t'],
                  'sofa_resp':                        df['sofa_resp'],
                  'sofa_coag':                        df['sofa_coag'],
                  'sofa_liver':                       df['sofa_liver'],
                  'sofa_cardio':                      df['sofa_cardio'],
                  'sofa_cns':                         df['sofa_cns'],
                  'sofa_renal':                       df['sofa_renal'],
                  'age':                              df['age'],
                  'rel_charttime':                    df['rel_charttime'],
                  'gender':                           df['gender'],
                  'ethnicity_WHITE':                  df['ethnicity_WHITE'],
                  'ethnicity_BLACK_AFRICAN_AMERICAN': df['ethnicity_BLACK_AFRICAN_AMERICAN'],
                  'ethnicity_HISPANIC_LATINO':        df['ethnicity_HISPANIC_LATINO'],
                  'ethnicity_ASIAN':                  df['ethnicity_ASIAN'],
                  'ethnicity_MISC':                   df['ethnicity_MISC'],
                  'ICU_Type_CCU':                     df['ICU_Type_CCU'],
                  'ICU_Type_CVICU':                   df['ICU_Type_CVICU'],
                  'ICU_Type_TSICU':                   df['ICU_Type_TSICU'],
                  'ICU_Type_MICU':                    df['ICU_Type_MICU'],
                  'ICU_Type_SICU':                    df['ICU_Type_SICU'],
                  'ICU_Type_NEURO':                   df['ICU_Type_NEURO'],
                  'ICU_Type_MISC':                    df['ICU_Type_MISC']})                 
y = pd.DataFrame({'PTT':                              df['PTT']})

# Logistic Regression:
VUS_scores = []

for i in range(10):
    
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=i)
    log_reg = LogisticRegression(max_iter=5000).fit(x_train, y_train.values.ravel())
    VUS_scores.append(roc_auc_score(y_test,log_reg.predict_proba(x_test)[:, 1]))

# Evaluate the Model
print("AVG (n=10) AUC-Score: ", round(mean(VUS_scores),4))


Downloading:   0%|                                                                         | 0/15376 [00:00<?, ?rows/s][A
Downloading: 100%|███████████████████████████████████████████████████████████| 15376/15376 [00:04<00:00, 3658.78rows/s][A


AVG (n=10) AUC-Score:  0.6402


### MULTICLASS, SINGLEFEATURE: VUS

In [51]:
# Load Data
query = """
        SELECT *
        FROM `bachelorarbeit-heparin.mimic_data.cohort1_final`
        """
df = gbq.read_gbq(query, project_id = "bachelorarbeit-heparin")
        
# Binning PTT Values:
for i in range(len(df['PTT'])):
    
    if df['PTT'][i] < 60:
        df['PTT'][i] = 0
    elif df['PTT'][i] >= 60 and df['PTT'][i] <= 100:
        df['PTT'][i] = 1
    elif df['PTT'][i] > 100:
        df['PTT'][i] = 2    
        
# Define x & y:
x = pd.DataFrame({'kum_hep_t':                        df['kum_hep_t']})                 
y = pd.DataFrame({'PTT':                              df['PTT']})

# Logistic Regression:
log_reg_score = []

for i in range(10):
    
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=i)
    log_reg = LogisticRegression(max_iter=5000).fit(x_train, y_train.values.ravel())
    log_reg_score.append(roc_auc_score(y_test,log_reg.predict_proba(x_test),multi_class='ovr'))

# Evaluate the Model
print("AVG (n=10) VUS-Score: ", round(mean(log_reg_score),4))


Downloading:   0%|                                                                         | 0/15376 [00:00<?, ?rows/s][A
Downloading: 100%|███████████████████████████████████████████████████████████| 15376/15376 [00:04<00:00, 3450.40rows/s][A


AVG (n=10) VUS-Score:  0.6885


### MULTICLASS, MULTIFEATURE: VUS

In [53]:
# Load Data
query = """
        SELECT *
        FROM `bachelorarbeit-heparin.mimic_data.cohort1_final`
        """
df = gbq.read_gbq(query, project_id = "bachelorarbeit-heparin")

# New columns as features:
ethnicity_WHITE = []
ethnicity_BLACK_AFRICAN_AMERICAN = []
ethnicity_HISPANIC_LATINO = []
ethnicity_ASIAN = []
ethnicity_MISC = []
ICU_Type_CCU = []
ICU_Type_CVICU = []
ICU_Type_TSICU = []
ICU_Type_MICU = []
ICU_Type_SICU = []
ICU_Type_NEURO = []
ICU_Type_MISC = []

# Convert gender to binary variables:
for i in range(len(df['gender'])):
    
    if df['gender'][i] == 'M':
        df['gender'][i] = 0
    elif df['gender'][i] == 'F':
        df['gender'][i] = 1
        
# Binning PTT Values:
for i in range(len(df['PTT'])):
    
    if df['PTT'][i] < 60:
        df['PTT'][i] = 0
    elif df['PTT'][i] >= 60 and df['PTT'][i] <= 100:
        df['PTT'][i] = 1
    elif df['PTT'][i] > 100:
        df['PTT'][i] = 2    
        
# Convert ethnicities to binary variables:
for i in range(len(df['ethnicity'])):
    
    if df['ethnicity'][i] == 'WHITE':
        ethnicity_WHITE.append(1)
    else:
        ethnicity_WHITE.append(0)
        
    if df['ethnicity'][i] == 'BLACK/AFRICAN AMERICAN':
        ethnicity_BLACK_AFRICAN_AMERICAN.append(1)
    else:
        ethnicity_BLACK_AFRICAN_AMERICAN.append(0)
        
    if df['ethnicity'][i] == 'HISPANIC/LATINO':
        ethnicity_HISPANIC_LATINO.append(1)
    else:
        ethnicity_HISPANIC_LATINO.append(0)
    
    if df['ethnicity'][i] == 'ASIAN':
        ethnicity_ASIAN.append(1)
    else:
        ethnicity_ASIAN.append(0)
    
    if df['ethnicity'][i] != 'WHITE' and df['ethnicity'][i] != 'BLACK/AFRICAN AMERICAN' and df['ethnicity'][i] != 'HISPANIC/LATINO' and df['ethnicity'][i] != 'ASIAN':
        ethnicity_MISC.append(1)
    else:
        ethnicity_MISC.append(0)
        
# Convert ICU_Types to binary variables:
for i in range(len(df['ICU_Type'])):
    
    if df['ICU_Type'][i] == 'Coronary Care Unit (CCU)':
        ICU_Type_CCU.append(1)
    else:
        ICU_Type_CCU.append(0)
        
    if df['ICU_Type'][i] == 'Cardiac Vascular Intensive Care Unit (CVICU)':
        ICU_Type_CVICU.append(1)
    else:
        ICU_Type_CVICU.append(0)
        
    if df['ICU_Type'][i] == 'Medical Intensive Care Unit (MICU)':
        ICU_Type_MICU.append(1)
    else:
        ICU_Type_MICU.append(0)
        
    if df['ICU_Type'][i] == 'Trauma SICU (TSICU)':
        ICU_Type_TSICU.append(1)
    else:
        ICU_Type_TSICU.append(0)
    
    if df['ICU_Type'][i] == 'Surgical Intensive Care Unit (SICU)':
        ICU_Type_SICU.append(1)
    else:
        ICU_Type_SICU.append(0)
        
    if df['ICU_Type'][i] == 'Neuro Intermediate' or df['ICU_Type'][i] == 'Neuro Surgical Intensive Care Unit (Neuro SICU)' or df['ICU_Type'][i] == 'Neuro Stepdown':
        ICU_Type_NEURO.append(1)
    else:
        ICU_Type_NEURO.append(0)
        
    if df['ICU_Type'][i] != 'Neuro Intermediate' and df['ICU_Type'][i] != 'Neuro Surgical Intensive Care Unit (Neuro SICU)' and df['ICU_Type'][i] != 'Neuro Stepdown' and df['ICU_Type'][i] != 'Coronary Care Unit (CCU)' and df['ICU_Type'][i] != 'Cardiac Vascular Intensive Care Unit (CVICU)' and df['ICU_Type'][i] != 'Medical Intensive Care Unit (MICU)' and df['ICU_Type'][i] != 'Trauma SICU (TSICU)' and df['ICU_Type'][i] != 'Surgical Intensive Care Unit (SICU)':
        ICU_Type_MISC.append(1)
    else:
        ICU_Type_MISC.append(0)

# Add new Features to the data:
df['ethnicity_WHITE'] = ethnicity_WHITE
df['ethnicity_BLACK_AFRICAN_AMERICAN'] = ethnicity_BLACK_AFRICAN_AMERICAN
df['ethnicity_HISPANIC_LATINO'] = ethnicity_HISPANIC_LATINO
df['ethnicity_ASIAN'] = ethnicity_ASIAN
df['ethnicity_MISC'] = ethnicity_MISC
df['ICU_Type_CCU'] = ICU_Type_CCU
df['ICU_Type_CVICU'] = ICU_Type_CVICU
df['ICU_Type_TSICU'] = ICU_Type_TSICU
df['ICU_Type_MICU'] = ICU_Type_MICU
df['ICU_Type_SICU'] = ICU_Type_SICU
df['ICU_Type_NEURO'] = ICU_Type_NEURO
df['ICU_Type_MISC'] = ICU_Type_MISC
        
# Define x & y:
x = pd.DataFrame({'kum_hep_t':                        df['kum_hep_t'],
                  'sofa_resp':                        df['sofa_resp'],
                  'sofa_coag':                        df['sofa_coag'],
                  'sofa_liver':                       df['sofa_liver'],
                  'sofa_cardio':                      df['sofa_cardio'],
                  'sofa_cns':                         df['sofa_cns'],
                  'sofa_renal':                       df['sofa_renal'],
                  'age':                              df['age'],
                  'rel_charttime':                    df['rel_charttime'],
                  'gender':                           df['gender'],
                  'ethnicity_WHITE':                  df['ethnicity_WHITE'],
                  'ethnicity_BLACK_AFRICAN_AMERICAN': df['ethnicity_BLACK_AFRICAN_AMERICAN'],
                  'ethnicity_HISPANIC_LATINO':        df['ethnicity_HISPANIC_LATINO'],
                  'ethnicity_ASIAN':                  df['ethnicity_ASIAN'],
                  'ethnicity_MISC':                   df['ethnicity_MISC'],
                  'ICU_Type_CCU':                     df['ICU_Type_CCU'],
                  'ICU_Type_CVICU':                   df['ICU_Type_CVICU'],
                  'ICU_Type_TSICU':                   df['ICU_Type_TSICU'],
                  'ICU_Type_MICU':                    df['ICU_Type_MICU'],
                  'ICU_Type_SICU':                    df['ICU_Type_SICU'],
                  'ICU_Type_NEURO':                   df['ICU_Type_NEURO'],
                  'ICU_Type_MISC':                    df['ICU_Type_MISC']})                 
y = pd.DataFrame({'PTT':                              df['PTT']})

# Logistic Regression:
log_reg_score = []

for i in range(10):
    
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=i)
    log_reg = LogisticRegression(max_iter=5000).fit(x_train, y_train.values.ravel())
    log_reg_score.append(roc_auc_score(y_test,log_reg.predict_proba(x_test),multi_class='ovr'))

# Evaluate the Model
print("AVG (n=10) VUS-Score: ", round(mean(log_reg_score),4))


Downloading:   0%|                                                                         | 0/15376 [00:00<?, ?rows/s][A
Downloading: 100%|███████████████████████████████████████████████████████████| 15376/15376 [00:04<00:00, 3513.98rows/s][A


AVG (n=10) VUS-Score:  0.7352
