In [5]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV, LogisticRegression
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN




smoten_knn_contin = pd.read_csv('smoten_knn_contin.csv', index_col=False)
smoten_median_imputed_contin = pd.read_csv('smoten_median_imputed_contin.csv', index_col=False)

pd.set_option("display.max_columns", None)


def classifyScale_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('coef_')
    print(clf[1].coef_)
    print('intercept_')
    print(clf[1].intercept_)
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')
    
    
print('### SMOTEN knn contin ###')
print()

X = smoten_knn_contin.drop('outcome',axis= 1)
y = smoten_knn_contin['outcome']


print('Logistic Regression lbfgs')

logRegress = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, 
                   fit_intercept=True, intercept_scaling=1, class_weight=None, 
                   random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', 
                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

print('lbfgs StandardScaler')
print()
classifyScale_coef(StandardScaler(), logRegress, 5, X, y)

print('lbfgs RobustScaler')
classifyScale_coef(RobustScaler(), logRegress, 5, X, y)


print('### SMOTEN median impute ###')
print()

X = smoten_median_imputed_contin.drop('outcome',axis= 1)
y = smoten_median_imputed_contin['outcome']

print('lbfgs StandardScaler')
print()
classifyScale_coef(StandardScaler(), logRegress, 5, X, y)

print('lbfgs RobustScaler')
classifyScale_coef(RobustScaler(), logRegress, 5, X, y)




### SMOTEN knn contin ###

Logistic Regression lbfgs
lbfgs StandardScaler

accuracy of each fold - [0.9610352673492605]
Avg accuracy : 0.1922070534698521
score 
 0.961532992036405
coef_
[[-1.4725266   0.30498635  0.13098023 -0.88503291 -0.29751792 -0.15564673
   1.28748485 -0.28269362  0.18360289 -0.75373038 -0.03309507  0.06893561
  -1.51724931  0.16593666  0.22794028 -0.86882459 -0.28700513 -0.25242182
   0.17454962 -0.76070377 -0.27015943 -0.39503703 -0.42527939 -0.4974183
  -0.53387942  0.01821861 -1.2901192   0.26215143]]
intercept_
[-0.16693365]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max'
 'urineoutput' 'gender']
decision function 
 [-3028.00936552 -4284.28720327 -3469.34209463



score 
 0.9615685437997725
coef_
[[-2.05822281  0.39745304  0.20571767 -1.12574154 -0.3155026  -0.16555331
   1.55806797 -0.34873817  0.23235936 -0.86765987 -0.06340923  0.06433591
  -1.87308859  0.18499491  0.19533835 -0.97331957 -0.24910621 -0.14147963
   0.13977018 -0.53481854 -0.09734753 -0.13477129 -0.59140945 -0.73330301
  -0.32100527 -0.00591869 -1.45965793  0.48962474]]
intercept_
[1.47114752]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max'
 'urineoutput' 'gender']
decision function 
 [-4840.06513174 -2289.78527281 -2227.95643843 ... -1664.69294578
  -578.6920425   -893.05473424]
classification report 
                   precision    recall  f1-score   support

Intubation False  



28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max'
 'urineoutput' 'gender']
decision function 
 [-3561.58838835 -3088.9251992  -1989.42896069 ... -1673.6249065
  -739.75624459 -1364.58350227]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.96      0.96      7139
 Intubation True       0.96      0.96      0.96      6925

        accuracy                           0.96     14064
       macro avg       0.96      0.96      0.96     14064
    weighted avg       0.96      0.96      0.96     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.96


lbfgs RobustScaler




accuracy of each fold - [0.9606797497155859]
Avg accuracy : 0.19213594994311717
score 
 0.9607686291240045
coef_
[[-2.07061448e+00  4.17157816e-01  2.23713702e-01 -1.19050986e+00
  -3.30479173e-01 -1.44019696e-01  1.53363121e+00 -3.38815134e-01
   2.67313468e-01 -8.50557475e-01 -1.48987291e-03  4.82858651e-02
  -1.85639170e+00  1.41043936e-01  2.16640259e-01 -9.71784303e-01
  -2.65038786e-01 -1.29763808e-01  1.11301868e-01 -5.09532987e-01
  -1.12841358e-01 -1.33576779e-01 -5.77549946e-01 -7.60554891e-01
  -1.99036245e-01 -1.07763345e-02 -1.27549050e+00  4.94886902e-01]]
intercept_
[1.45774674]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max'
 'urineoutput' 'gender']
decision function 
 [-



In [6]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV, LogisticRegression
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN




smoten_knn_contin = pd.read_csv('smoten_knn_contin.csv', index_col=False)
smoten_median_imputed_contin = pd.read_csv('smoten_median_imputed_contin.csv', index_col=False)

pd.set_option("display.max_columns", None)


def classifyScale_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('coef_')
    print(clf[1].coef_)
    print('intercept_')
    print(clf[1].intercept_)
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')
    
    
print('### SMOTEN knn contin ###')
print()

X = smoten_knn_contin.drop('outcome',axis= 1)
y = smoten_knn_contin['outcome']


print('Logistic Regression lbfgs')

logRegress = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, 
                   fit_intercept=True, intercept_scaling=1, class_weight=None, 
                   random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', 
                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

print('lbfgs StandardScaler')
print()
classifyScale_coef(StandardScaler(), logRegress, 5, X, y)

print('lbfgs RobustScaler')
classifyScale_coef(RobustScaler(), logRegress, 5, X, y)


print('### SMOTEN median impute ###')
print()

X = smoten_median_imputed_contin.drop('outcome',axis= 1)
y = smoten_median_imputed_contin['outcome']

print('lbfgs StandardScaler')
print()
classifyScale_coef(StandardScaler(), logRegress, 5, X, y)

print('lbfgs RobustScaler')
classifyScale_coef(RobustScaler(), logRegress, 5, X, y)




### SMOTEN knn contin ###

Logistic Regression lbfgs
lbfgs StandardScaler

accuracy of each fold - [0.9608930602957907]
Avg accuracy : 0.19217861205915815
score 
 0.9611063708759955
coef_
[[-1.47014894  0.34897317  0.17594181 -0.93685175 -0.29864747 -0.16045933
   1.27626699 -0.29355034  0.2194751  -0.75168236 -0.02669067  0.04561225
  -1.50473654  0.15510825  0.20961361 -0.89618157 -0.26909461 -0.27549912
   0.12612695 -0.7270956  -0.27719584 -0.41587238 -0.38608793 -0.51184283
  -0.4444669  -0.06108515 -1.25720324  0.25297211]]
intercept_
[-0.16644661]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max'
 'urineoutput' 'gender']
decision function 
 [-3570.24889516 -3396.07166271 -4181.93827



score 
 0.9618529579067122
coef_
[[-2.07091124  0.40929864  0.20702457 -1.16942671 -0.3038702  -0.14808426
   1.55707143 -0.38065403  0.3019258  -0.89340932 -0.04393553  0.03273517
  -1.83999211  0.15597012  0.21046381 -1.00094289 -0.25339399 -0.14136181
   0.16074518 -0.55849374 -0.09460002 -0.14443665 -0.6492421  -0.71712324
  -0.3004406  -0.03531873 -1.44796035  0.49446949]]
intercept_
[1.48559037]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max'
 'urineoutput' 'gender']
decision function 
 [-5387.88292621 -9099.98075243 -2988.8974879  ... -1300.16812066
  -790.9070362  -1763.44332754]
classification report 
                   precision    recall  f1-score   support

Intubation False  



28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max'
 'urineoutput' 'gender']
decision function 
 [-3057.81954709 -2132.47474207 -1882.50737388 ... -2064.58163014
 -1350.93572214  -681.16027862]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.97      0.96      6993
 Intubation True       0.97      0.96      0.96      7071

        accuracy                           0.96     14064
       macro avg       0.96      0.96      0.96     14064
    weighted avg       0.96      0.96      0.96     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.96


lbfgs RobustScaler




accuracy of each fold - [0.9599687144482366]
Avg accuracy : 0.19199374288964732
score 
 0.960146473265074
coef_
[[-2.03692186  0.40593405  0.18089003 -1.15909488 -0.34514005 -0.1814366
   1.48276564 -0.32023317  0.26933607 -0.83818196  0.01863958  0.07443421
  -1.78197494  0.16052562  0.23505302 -0.96850041 -0.27151747 -0.13596647
   0.11838848 -0.50580194 -0.10803577 -0.13874946 -0.55333465 -0.7500299
  -0.20503865  0.00317344 -1.31203227  0.50106403]]
intercept_
[1.46322355]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max'
 'urineoutput' 'gender']
decision function 
 [ -799.39752049 -1277.03296636 -4347.71082608 ...  -616.96425455
 -1433.44699251  -733.24712378]
classification report 
 

