In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV, LogisticRegression
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN




smoten_knn_contin = pd.read_csv('smoten_knn_contin.csv', index_col=False)
smoten_median_imputed_contin = pd.read_csv('smoten_median_imputed_contin.csv', index_col=False)

pd.set_option("display.max_columns", None)


def classifyScale_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('coef_')
    print(clf[1].coef_)
    print('intercept_')
    print(clf[1].intercept_)
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')
    
    
print('### SMOTEN knn contin ###')
print()

X = smoten_knn_contin.loc[:, ~smoten_knn_contin.columns.isin(['heart_rate_max', 'mbp_max', 'dbp_min', 'temperature_max', 'wbc_min', 'creatinine_min', 'hemoglobin_max', 'pt_max', 'gender','outcome'])]
y = smoten_knn_contin['outcome']


print('Logistic Regression lbfgs')

logRegress = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=2.0, 
                   fit_intercept=True, intercept_scaling=1, class_weight=None, 
                   random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', 
                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

print('lbfgs StandardScaler')
print()
classifyScale_coef(StandardScaler(), logRegress, 5, X, y)

logRegress = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, 
                   fit_intercept=True, intercept_scaling=1, class_weight=None, 
                   random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', 
                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)


print('lbfgs RobustScaler')                          
X = smoten_knn_contin.loc[:, ~smoten_knn_contin.columns.isin(['mbp_max', 'dbp_max', 'temperature_max', 'creatinine_max','outcome'])]
y = smoten_knn_contin['outcome']                          
                          
classifyScale_coef(RobustScaler(), logRegress, 5, X, y)

print('### SMOTEN median impute ###')
print()
                          

                                     
print('lbfgs StandardScaler')
print()

X = smoten_median_imputed_contin.loc[:, ~smoten_knn_contin.columns.isin(['heart_rate_max', 'mbp_max', 'dbp_min', 'dbp_max', 'wbc_min', 'hemoglobin_max', 'pt_max', 'outcome'])]
y = smoten_median_imputed_contin['outcome']

logRegress = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, 
                   fit_intercept=True, intercept_scaling=1, class_weight=None, 
                   random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', 
                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

                          
classifyScale_coef(StandardScaler(), logRegress, 5, X, y)

                                     
X = smoten_median_imputed_contin.loc[:, ~smoten_knn_contin.columns.isin(['sbp_max', 'dbp_min', 'hemoglobin_max', 'pt_max', 'gender','outcome'])]
y = smoten_median_imputed_contin['outcome']
                          
logRegress = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=2.0, 
                   fit_intercept=True, intercept_scaling=1, class_weight=None, 
                   random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', 
                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

                                     
print('lbfgs RobustScaler')
classifyScale_coef(RobustScaler(), logRegress, 5, X, y)




### SMOTEN knn contin ###

Logistic Regression lbfgs
lbfgs StandardScaler

accuracy of each fold - [0.960608646188851]
Avg accuracy : 0.1921217292377702
score 
 0.9612308020477816
coef_
[[-1.4833366   0.32366747 -0.76092198 -0.2283931   1.09305142 -0.32500974
   0.16553943 -0.66060473 -0.01134606 -1.46991864  0.11087435 -0.74544058
  -0.24933465 -0.32208389 -0.68334316 -0.6572162  -0.73850856 -0.46948236
  -1.29785662]]
intercept_
[-0.20090412]
fit
19 ['age' 'heart_rate_min' 'heart_rate_mean' 'mbp_min' 'mbp_mean' 'sbp_min'
 'sbp_max' 'sbp_mean' 'dbp_max' 'dbp_mean' 'temperature_min'
 'temperature_mean' 'glucose_min' 'glucose_max' 'wbc_max' 'creatinine_max'
 'hemoglobin_min' 'pt_min' 'urineoutput']
decision function 
 [-3063.53938996 -4277.56481693 -8274.04963826 ... -1902.62571594
  -514.50085303 -1617.66155188]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.96      0.96      7066
 Intubation True       0.96      0.



accuracy of each fold - [0.9619596131968146]
Avg accuracy : 0.19239192263936292
score 
 0.9601286973833902
coef_
[[-2.08536684  0.40625059  0.24806659 -1.18036334 -0.32604532  1.39582383
  -0.37327694  0.23122867 -0.84688751 -0.0232199  -1.80603205  0.05442014
  -0.7185959  -0.24115141 -0.15693577  0.14930052 -0.5360962  -0.23180063
  -0.58267927 -0.75727309 -0.31215261 -0.01251087 -1.38961319  0.5753426 ]]
intercept_
[1.46473822]
fit
24 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_mean'
 'temperature_min' 'temperature_mean' 'glucose_min' 'glucose_max'
 'wbc_min' 'wbc_max' 'creatinine_min' 'hemoglobin_min' 'hemoglobin_max'
 'pt_min' 'pt_max' 'urineoutput' 'gender']
decision function 
 [-3431.55035052 -1489.49671587 -2930.02733913 ... -1149.52406011
  -366.8462525  -1176.81303583]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.97      0



21 ['age' 'heart_rate_min' 'heart_rate_mean' 'mbp_min' 'mbp_mean' 'sbp_min'
 'sbp_max' 'sbp_mean' 'dbp_mean' 'temperature_min' 'temperature_max'
 'temperature_mean' 'glucose_min' 'glucose_max' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'pt_min' 'urineoutput' 'gender']
decision function 
 [-4999.40468228 -8558.60493561 -3802.77965721 ...  -780.39792657
 -2224.98626213 -1452.5241984 ]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.96      0.96      7026
 Intubation True       0.96      0.96      0.96      7038

        accuracy                           0.96     14064
       macro avg       0.96      0.96      0.96     14064
    weighted avg       0.96      0.96      0.96     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.96


lbfgs RobustScaler




accuracy of each fold - [0.9613196814562003]
Avg accuracy : 0.19226393629124006
score 
 0.9611596985210467
coef_
[[-2.06513036  0.48267939  0.17241276 -1.27362296 -0.29327336 -0.14219876
   1.48268932 -0.4327054  -0.61733593  0.10271825 -1.86601956  0.16517986
   0.27723013 -1.03632101 -0.23572168 -0.16046957  0.2428768  -0.6470287
  -0.07954751 -0.15999709 -1.11203083 -0.18239433 -1.33241359]]
intercept_
[1.18090228]
fit
23 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_mean' 'dbp_max' 'dbp_mean'
 'temperature_min' 'temperature_max' 'temperature_mean' 'glucose_min'
 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min' 'creatinine_max'
 'hemoglobin_min' 'pt_min' 'urineoutput']
decision function 
 [-5009.87966737 -2571.85192423 -1453.31217689 ...  -764.33919832
  -807.54573611  -858.07823197]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.96      0.96      7071



In [5]:
print('##SFS otpimised###')


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV, LogisticRegression
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN




smoten_knn_contin = pd.read_csv('smoten_knn_contin.csv', index_col=False)
smoten_median_imputed_contin = pd.read_csv('smoten_median_imputed_contin.csv', index_col=False)

pd.set_option("display.max_columns", None)


def classifyScale_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('coef_')
    print(clf[1].coef_)
    print('intercept_')
    print(str(clf[1].intercept_).strip('[').strip(']'))
    print('fit')
    for i in range(len(clf.feature_names_in_)):
            #print(clf.feature_names_in_[i], ":" ,clf[1].coef_[i])
            print(clf.feature_names_in_[i], str(clf[1].coef_).strip('[').strip(']').split()[i])
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')
    
    
print('### SMOTEN knn contin ###')
print()

X = smoten_knn_contin[['age', 'heart_rate_mean', 'mbp_mean', 'temperature_mean', 'hemoglobin_max']]
y = smoten_knn_contin['outcome']


print('Logistic Regression lbfgs')

logRegress = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=2.0, 
                   fit_intercept=True, intercept_scaling=1, class_weight=None, 
                   random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', 
                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

print('lbfgs StandardScaler')
print()
classifyScale_coef(StandardScaler(), logRegress, 5, X, y)

logRegress = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, 
                   fit_intercept=True, intercept_scaling=1, class_weight=None, 
                   random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', 
                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)


print('lbfgs RobustScaler')                          
X = smoten_knn_contin[['age', 'heart_rate_mean', 'mbp_mean', 'temperature_mean', 'hemoglobin_max']]
y = smoten_knn_contin['outcome']                          
                          
classifyScale_coef(RobustScaler(), logRegress, 5, X, y)

print('### SMOTEN median impute ###')
print()
                          

                                     
print('lbfgs StandardScaler')
print()

X = smoten_median_imputed_contin[['age', 'heart_rate_mean', 'mbp_mean', 'temperature_mean', 'hemoglobin_max']]
y = smoten_median_imputed_contin['outcome']

logRegress = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, 
                   fit_intercept=True, intercept_scaling=1, class_weight=None, 
                   random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', 
                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

                          
classifyScale_coef(StandardScaler(), logRegress, 5, X, y)

                                     
X = smoten_median_imputed_contin[['age', 'heart_rate_mean', 'mbp_mean', 'temperature_mean', 'hemoglobin_max']]
y = smoten_median_imputed_contin['outcome']
                          
logRegress = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=2.0, 
                   fit_intercept=True, intercept_scaling=1, class_weight=None, 
                   random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', 
                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

                                     
print('lbfgs RobustScaler')
classifyScale_coef(RobustScaler(), logRegress, 5, X, y)




##SFS otpimised###
### SMOTEN knn contin ###

Logistic Regression lbfgs
lbfgs StandardScaler

accuracy of each fold - [0.9267633674630261]
Avg accuracy : 0.18535267349260523
score 
 0.9248080204778157
coef_
[[-1.7316398  -1.00506646 -1.1622396  -0.94871929 -0.99073708]]
intercept_
-0.15967338
fit
age -1.7316398
heart_rate_mean -1.00506646
mbp_mean -1.1622396
temperature_mean -0.94871929
hemoglobin_max -0.99073708
decision function 
 [-357.52062507 -345.78072066 -354.35049299 ... -276.62300691 -266.70384129
 -252.57938293]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.93      0.93      0.93      7072
 Intubation True       0.93      0.92      0.93      6992

        accuracy                           0.93     14064
       macro avg       0.93      0.93      0.93     14064
    weighted avg       0.93      0.93      0.93     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.93


lbfgs RobustScaler
accuracy of each fold - [0.92



accuracy of each fold - [0.927901023890785]
Avg accuracy : 0.185580204778157
score 
 0.927634385665529
coef_
[[-2.45973335 -1.28577071 -1.44022688 -1.06051177 -1.44432669]]
intercept_
0.49842392
fit
age -2.45973335
heart_rate_mean -1.28577071
mbp_mean -1.44022688
temperature_mean -1.06051177
hemoglobin_max -1.44432669
decision function 
 [-467.2963853  -351.98502094 -379.32865174 ... -327.27456715 -361.90649834
 -341.6279351 ]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.93      0.92      0.93      7019
 Intubation True       0.92      0.94      0.93      7045

        accuracy                           0.93     14064
       macro avg       0.93      0.93      0.93     14064
    weighted avg       0.93      0.93      0.93     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.93




