In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN

smoten_median_imputed_less_40 = pd.read_csv('smoten_median_imputed_less_40.csv', index_col=False)

pd.set_option("display.max_columns", None)


def classifyScale_dual_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('dual_coef_')
    print(clf[1].dual_coef_)
    print('intercept_')
    print(clf[1].intercept_)
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')
    
    

print('### SMOTEN median impute ###')
print()

X = smoten_median_imputed_less_40.drop('outcome',axis= 1)
y = smoten_median_imputed_less_40['outcome']

print('SVC rbf SVM')
mySVC = SVC(kernel='rbf', C=1.0)
print('SVC rbf SVM StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('SVC rbf SVM RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)


print('polynomial SVM C')
mySVC = SVC(kernel='poly', C=1.0)
print('polynomial SVM C StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('polynomial SVM C RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)


print('sigmoid SVM C')
mySVC = SVC(kernel='sigmoid', C=1.0)
print('sigmoid SVM C StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('sigmoid SVM C RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)


### SMOTEN median impute ###

SVC rbf SVM
SVC rbf SVM StandardScaler

accuracy of each fold - [0.9788822525597269]
Avg accuracy : 0.1957764505119454
score 
 0.9806242889647326
dual_coef_
[[-0.17875815 -0.123554   -0.23404695 ...  1.          1.
   1.        ]]
intercept_
[-0.85158954]
fit
33 ['gender' 'age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean'
 'mbp_min' 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min'
 'dbp_max' 'dbp_mean' 'temperature_min' 'temperature_max'
 'temperature_mean' 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max'
 'creatinine_min' 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max'
 'pt_min' 'pt_max' 'urineoutput' 'sofa_coagulation' 'sofa_cardiovascular'
 'sofa_cns' 'sofa_renal' 'charlson_comorbidity_index']




decision function 
 [-0.85158954 -0.85158954 -0.85158954 ... -0.85158954 -0.85158954
 -0.85158954]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      1.00      0.98      7044
 Intubation True       1.00      0.96      0.98      7020

        accuracy                           0.98     14064
       macro avg       0.98      0.98      0.98     14064
    weighted avg       0.98      0.98      0.98     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.98


SVC rbf SVM RobustScaler
accuracy of each fold - [0.9792377701934016]
Avg accuracy : 0.19584755403868032
score 
 0.9797710466439136
dual_coef_
[[-0.57513245 -1.         -1.         ...  1.          0.72674712
   0.03967764]]
intercept_
[-0.87772388]
fit
33 ['gender' 'age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean'
 'mbp_min' 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min'
 'dbp_max' 'dbp_mean' 'temperature_min' 'temperature_max'
 'temperature_mean' '



decision function 
 [-0.87772388 -0.87772388 -0.87772388 ... -0.87772388 -0.87772388
 -0.87772388]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      1.00      0.98      7019
 Intubation True       1.00      0.96      0.98      7045

        accuracy                           0.98     14064
       macro avg       0.98      0.98      0.98     14064
    weighted avg       0.98      0.98      0.98     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.98


polynomial SVM C
polynomial SVM C StandardScaler

accuracy of each fold - [0.9755403868031854]
Avg accuracy : 0.1951080773606371
score 
 0.9799310295790671
dual_coef_
[[-1.         -1.         -1.         ...  1.          0.62289891
   1.        ]]
intercept_
[-0.21009424]
fit
33 ['gender' 'age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean'
 'mbp_min' 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min'
 'dbp_max' 'dbp_mean' 'temperature_min' 'temperature_ma



decision function 
 [ 5.17177222e+06  6.24069288e+06  1.53782682e+08 ... -3.97934592e+02
  7.16121416e+04  6.75394765e+05]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.97      0.99      0.98      7077
 Intubation True       0.99      0.96      0.98      6987

        accuracy                           0.98     14064
       macro avg       0.98      0.98      0.98     14064
    weighted avg       0.98      0.98      0.98     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.98


polynomial SVM C RobustScaler
accuracy of each fold - [0.9473833902161547]
Avg accuracy : 0.18947667804323093
score 
 0.9522717576791809
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[0.67236238]
fit
33 ['gender' 'age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean'
 'mbp_min' 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min'
 'dbp_max' 'dbp_mean' 'temperature_min' 'temperature_max'
 'temperature_mean' 'glucose_min' 'glucose_



decision function 
 [-5.50825670e+07 -1.93233608e+06 -1.19217828e+08 ... -4.27866688e+05
 -5.83852691e+06 -5.60253414e+06]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.97      0.93      0.95      7061
 Intubation True       0.93      0.97      0.95      7003

        accuracy                           0.95     14064
       macro avg       0.95      0.95      0.95     14064
    weighted avg       0.95      0.95      0.95     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.95


sigmoid SVM C
sigmoid SVM C StandardScaler

accuracy of each fold - [0.9242747440273038]
Avg accuracy : 0.18485494880546077
score 
 0.9264078498293515
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[-31.52908839]
fit
33 ['gender' 'age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean'
 'mbp_min' 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min'
 'dbp_max' 'dbp_mean' 'temperature_min' 'temperature_max'
 'temperature_mean' 'glucos



decision function 
 [-421.1908719  -704.41179667 -834.58995542 ... -302.92009436 -729.55930035
 -787.67272381]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.92      0.92      0.92      6872
 Intubation True       0.93      0.92      0.93      7192

        accuracy                           0.92     14064
       macro avg       0.92      0.92      0.92     14064
    weighted avg       0.92      0.92      0.92     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.92


sigmoid SVM C RobustScaler
accuracy of each fold - [0.8312002275312855]
Avg accuracy : 0.1662400455062571
score 
 0.8323912116040956
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[48.851246]
fit
33 ['gender' 'age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean'
 'mbp_min' 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min'
 'dbp_max' 'dbp_mean' 'temperature_min' 'temperature_max'
 'temperature_mean' 'glucose_min' 'glucose_max' 'wbc_min' 'w



decision function 
 [-4821.37358703 -5142.98466537 -5021.89164161 ... -5283.48443577
 -5321.73034794 -4919.93194947]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.83      0.83      0.83      6938
 Intubation True       0.83      0.83      0.83      7126

        accuracy                           0.83     14064
       macro avg       0.83      0.83      0.83     14064
    weighted avg       0.83      0.83      0.83     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.83


