In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN

smoten_knn_contin = pd.read_csv('smoten_knn_contin.csv', index_col=False)
smoten_median_imputed_contin = pd.read_csv('smoten_median_imputed_contin.csv', index_col=False)

pd.set_option("display.max_columns", None)


def classifyScale_dual_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('dual_coef_')
    print(clf[1].dual_coef_)
    print('intercept_')
    print(clf[1].intercept_)
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')
    
    
print('### SMOTEN knn contin ###')
print()

X = smoten_knn_contin.drop('outcome',axis= 1)
y = smoten_knn_contin['outcome']

print('SVC rbf SVM')
mySVC = SVC(kernel='rbf', C=1.0)
print('SVC rbf SVM StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('SVC rbf SVM RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)


print('polynomial SVM C')
mySVC = SVC(kernel='poly', C=1.0)
print('polynomial SVM C StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('polynomial SVM C RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)


print('sigmoid SVM C')
mySVC = SVC(kernel='sigmoid', C=1.0)
print('sigmoid SVM C StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('sigmoid SVM C RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)



print('### SMOTEN median impute ###')
print()

X = smoten_median_imputed_contin.drop('outcome',axis= 1)
y = smoten_median_imputed_contin['outcome']

print('SVC rbf SVM')
mySVC = SVC(kernel='rbf', C=1.0)
print('SVC rbf SVM StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('SVC rbf SVM RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)


print('polynomial SVM C')
mySVC = SVC(kernel='poly', C=1.0)
print('polynomial SVM C StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('polynomial SVM C RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)


print('sigmoid SVM C')
mySVC = SVC(kernel='sigmoid', C=1.0)
print('sigmoid SVM C StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('sigmoid SVM C RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)


### SMOTEN knn contin ###

SVC rbf SVM
SVC rbf SVM StandardScaler

accuracy of each fold - [0.9793799772468714]
Avg accuracy : 0.19587599544937428
score 
 0.9796466154721274
dual_coef_
[[-1.         -0.02974006 -0.50912077 ...  1.          0.7552829
   1.        ]]
intercept_
[-0.83513246]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max'
 'urineoutput' 'gender']




decision function 
 [-0.83513246 -0.83513246 -0.83513246 ... -0.83513246 -0.83513246
 -0.83513246]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      1.00      0.98      6929
 Intubation True       1.00      0.96      0.98      7135

        accuracy                           0.98     14064
       macro avg       0.98      0.98      0.98     14064
    weighted avg       0.98      0.98      0.98     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.98


SVC rbf SVM RobustScaler
accuracy of each fold - [0.9766069397042093]
Avg accuracy : 0.19532138794084186
score 
 0.9785800625711035
dual_coef_
[[-0.26002272 -1.         -1.         ...  1.          1.
   0.23119054]]
intercept_
[-0.86164559]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glu



decision function 
 [-0.86164559 -0.86164559 -0.86164559 ... -0.86164559 -0.86164559
 -0.86164559]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.99      0.98      7058
 Intubation True       0.99      0.96      0.98      7006

        accuracy                           0.98     14064
       macro avg       0.98      0.98      0.98     14064
    weighted avg       0.98      0.98      0.98     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.98


polynomial SVM C
polynomial SVM C StandardScaler

accuracy of each fold - [0.9731939704209329]
Avg accuracy : 0.19463879408418658
score 
 0.9768202502844141
dual_coef_
[[-1.         -1.         -1.         ...  1.          1.
   0.78062641]]
intercept_
[-0.36558]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mea



decision function 
 [1.95206785e+07 3.24696696e+07 3.16686291e+06 ... 2.43513268e+05
 8.69185818e+03 1.82319785e+06]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.97      0.98      0.97      6984
 Intubation True       0.98      0.97      0.97      7080

        accuracy                           0.97     14064
       macro avg       0.97      0.97      0.97     14064
    weighted avg       0.97      0.97      0.97     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.97


polynomial SVM C RobustScaler
accuracy of each fold - [0.9443970420932878]
Avg accuracy : 0.18887940841865755
score 
 0.9478811149032992
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[0.63552788]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 



decision function 
 [-8.63197850e+08 -2.02870614e+08 -2.42784932e+08 ... -1.00330831e+06
 -5.01386452e+05 -4.08180305e+07]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.93      0.94      7039
 Intubation True       0.93      0.96      0.95      7025

        accuracy                           0.94     14064
       macro avg       0.94      0.94      0.94     14064
    weighted avg       0.94      0.94      0.94     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.94


sigmoid SVM C
sigmoid SVM C StandardScaler

accuracy of each fold - [0.9101251422070534]
Avg accuracy : 0.1820250284414107
score 
 0.9132536973833902
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[-59.510913]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'gluc



decision function 
 [-1462.90245154 -1322.92158534  -921.79467249 ...  -587.46770598
  -805.95449777  -410.96605094]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.91      0.91      0.91      6983
 Intubation True       0.91      0.91      0.91      7081

        accuracy                           0.91     14064
       macro avg       0.91      0.91      0.91     14064
    weighted avg       0.91      0.91      0.91     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.91


sigmoid SVM C RobustScaler
accuracy of each fold - [0.824018771331058]
Avg accuracy : 0.1648037542662116
score 
 0.8288360352673493
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[39.18278452]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc



decision function 
 [-5597.65986198 -4819.19873716 -5217.53388246 ... -4086.28950285
 -5374.63299773 -5572.58771844]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.82      0.82      0.82      6972
 Intubation True       0.82      0.83      0.83      7092

        accuracy                           0.82     14064
       macro avg       0.82      0.82      0.82     14064
    weighted avg       0.82      0.82      0.82     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.82


### SMOTEN median impute ###

SVC rbf SVM
SVC rbf SVM StandardScaler

accuracy of each fold - [0.979806598407281]
Avg accuracy : 0.1959613196814562
score 
 0.9795044084186576
dual_coef_
[[-0.69030547 -0.34348538 -1.         ...  1.          1.
   1.        ]]
intercept_
[-0.84091094]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_



decision function 
 [-0.84091094 -0.84091094 -0.84091094 ... -0.84091094 -0.84091094
 -0.84091094]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      1.00      0.98      7105
 Intubation True       1.00      0.96      0.98      6959

        accuracy                           0.98     14064
       macro avg       0.98      0.98      0.98     14064
    weighted avg       0.98      0.98      0.98     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.98


SVC rbf SVM RobustScaler
accuracy of each fold - [0.9780290102389079]
Avg accuracy : 0.19560580204778158
score 
 0.9776023890784983
dual_coef_
[[-1.         -1.         -1.         ...  1.          0.09899516
   1.        ]]
intercept_
[-0.85669597]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_m



decision function 
 [-0.85669597 -0.85669597 -0.85669597 ... -0.85669597 -0.85669597
 -0.85669597]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.99      0.98      7041
 Intubation True       0.99      0.96      0.98      7023

        accuracy                           0.98     14064
       macro avg       0.98      0.98      0.98     14064
    weighted avg       0.98      0.98      0.98     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.98


polynomial SVM C
polynomial SVM C StandardScaler

accuracy of each fold - [0.9728384527872582]
Avg accuracy : 0.19456769055745166
score 
 0.9771046643913538
dual_coef_
[[-1.         -1.         -1.         ...  1.          1.
   0.96187275]]
intercept_
[-0.37255201]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_



decision function 
 [25047038.73048314  9797564.38656458  4622709.65583598 ...
   -32860.9347856   -285441.54199411  -107124.75318177]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.98      0.97      6935
 Intubation True       0.98      0.96      0.97      7129

        accuracy                           0.97     14064
       macro avg       0.97      0.97      0.97     14064
    weighted avg       0.97      0.97      0.97     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.97


polynomial SVM C RobustScaler
accuracy of each fold - [0.936504550625711]
Avg accuracy : 0.18730091012514222
score 
 0.9370022753128555
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[0.65660173]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucos



decision function 
 [-5.07086170e+08 -3.22865753e+08 -1.13453508e+07 ... -9.74614346e+06
 -2.53960144e+07 -1.88920435e+07]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.91      0.93      6948
 Intubation True       0.91      0.97      0.94      7116

        accuracy                           0.94     14064
       macro avg       0.94      0.94      0.94     14064
    weighted avg       0.94      0.94      0.94     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.94


sigmoid SVM C
sigmoid SVM C StandardScaler

accuracy of each fold - [0.9106939704209329]
Avg accuracy : 0.1821387940841866
score 
 0.910160693970421
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[-58.05703406]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glu



decision function 
 [ -730.72202174 -1004.75742786 -1210.52914849 ...   -79.88026755
  -976.45484276  -809.69107749]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.91      0.91      0.91      7110
 Intubation True       0.91      0.91      0.91      6954

        accuracy                           0.91     14064
       macro avg       0.91      0.91      0.91     14064
    weighted avg       0.91      0.91      0.91     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.91


sigmoid SVM C RobustScaler
accuracy of each fold - [0.8312002275312855]
Avg accuracy : 0.1662400455062571
score 
 0.8269162400455062
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[44.82173713]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wb



decision function 
 [-5518.63340204 -5744.46832909 -5382.63706783 ... -5487.46794159
 -5153.01471629 -5191.36359038]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.83      0.83      0.83      6950
 Intubation True       0.83      0.84      0.83      7114

        accuracy                           0.83     14064
       macro avg       0.83      0.83      0.83     14064
    weighted avg       0.83      0.83      0.83     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.83


