In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN

smoten_noncontin = pd.read_csv('smoten_noncontin.csv', index_col=False)
smoten_noncontin = pd.read_csv('smoten_noncontin.csv', index_col=False)

pd.set_option("display.max_columns", None)


def classifyScale_dual_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('dual_coef_')
    print(clf[1].dual_coef_)
    print('intercept_')
    print(clf[1].intercept_)
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')
    
    
print('### SMOTEN median contin ###')
print()

X = smoten_noncontin.drop('outcome',axis= 1)
y = smoten_noncontin['outcome']

print('SVC rbf SVM')
mySVC = SVC(kernel='rbf', C=1.0)
print('SVC rbf SVM StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('SVC rbf SVM RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)


print('polynomial SVM C')
mySVC = SVC(kernel='poly', C=1.0)
print('polynomial SVM C StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('polynomial SVM C RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)


print('sigmoid SVM C')
mySVC = SVC(kernel='sigmoid', C=1.0)
print('sigmoid SVM C StandardScaler')
print()
classifyScale_dual_coef(StandardScaler(), mySVC, 5, X, y)

print('sigmoid SVM C RobustScaler')
classifyScale_dual_coef(RobustScaler(), mySVC, 5, X, y)



### SMOTEN median contin ###

SVC rbf SVM
SVC rbf SVM StandardScaler

accuracy of each fold - [0.7655005688282139]
Avg accuracy : 0.15310011376564278
score 
 0.7611810295790671
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[-0.99314966]
fit
6 ['sofa_coagulation' 'sofa_cardiovascular' 'sofa_cns' 'sofa_renal' 'gender'
 'charlson_comorbidity_index']




decision function 
 [-1.54121618 -2.08953096 -0.99072934 ...  0.88346498 -1.9798762
  0.2383862 ]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.76      0.77      0.77      7033
 Intubation True       0.77      0.76      0.76      7031

        accuracy                           0.77     14064
       macro avg       0.77      0.77      0.77     14064
    weighted avg       0.77      0.77      0.77     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.77


SVC rbf SVM RobustScaler
accuracy of each fold - [0.7637229806598407]
Avg accuracy : 0.15274459613196814
score 
 0.762709755403868
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[-0.98043084]
fit
6 ['sofa_coagulation' 'sofa_cardiovascular' 'sofa_cns' 'sofa_renal' 'gender'
 'charlson_comorbidity_index']




decision function 
 [-1.02034294 -1.43721661 -0.10401154 ... -0.97906806 -1.21342094
 -0.24520644]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.76      0.77      0.77      7078
 Intubation True       0.76      0.76      0.76      6986

        accuracy                           0.76     14064
       macro avg       0.76      0.76      0.76     14064
    weighted avg       0.76      0.76      0.76     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.76


polynomial SVM C
polynomial SVM C StandardScaler

accuracy of each fold - [0.7100398179749715]
Avg accuracy : 0.1420079635949943
score 
 0.7038715870307167
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[-0.14707672]
fit
6 ['sofa_coagulation' 'sofa_cardiovascular' 'sofa_cns' 'sofa_renal' 'gender'
 'charlson_comorbidity_index']




decision function 
 [-0.94155751 -0.06341998 -5.62118812 ... -3.87417824 -5.38122553
 -2.64519805]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.68      0.80      0.74      7113
 Intubation True       0.75      0.61      0.68      6951

        accuracy                           0.71     14064
       macro avg       0.72      0.71      0.71     14064
    weighted avg       0.72      0.71      0.71     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.71


polynomial SVM C RobustScaler
accuracy of each fold - [0.6926194539249146]
Avg accuracy : 0.13852389078498292
score 
 0.6932060580204779
dual_coef_
[[-1.         -1.         -1.         ...  1.          0.77150268
   1.        ]]
intercept_
[-0.36438751]
fit
6 ['sofa_coagulation' 'sofa_cardiovascular' 'sofa_cns' 'sofa_renal' 'gender'
 'charlson_comorbidity_index']




decision function 
 [ 7.51520054  8.19504784 20.9017487  ... 41.61667492 75.14194025
  9.97242704]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.66      0.82      0.73      7075
 Intubation True       0.75      0.57      0.65      6989

        accuracy                           0.69     14064
       macro avg       0.70      0.69      0.69     14064
    weighted avg       0.70      0.69      0.69     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.69


sigmoid SVM C
sigmoid SVM C StandardScaler

accuracy of each fold - [0.5843287827076223]
Avg accuracy : 0.11686575654152447
score 
 0.5917768771331058
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[23.91218725]
fit
6 ['sofa_coagulation' 'sofa_cardiovascular' 'sofa_cns' 'sofa_renal' 'gender'
 'charlson_comorbidity_index']




decision function 
 [-540.70542523  118.47140332 -405.5521411  ...  -21.78491829 -573.76097934
  -72.20665292]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.59      0.59      0.59      7058
 Intubation True       0.58      0.58      0.58      7006

        accuracy                           0.58     14064
       macro avg       0.58      0.58      0.58     14064
    weighted avg       0.58      0.58      0.58     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.58


sigmoid SVM C RobustScaler
accuracy of each fold - [0.5311433447098977]
Avg accuracy : 0.10622866894197953
score 
 0.5381470420932878
dual_coef_
[[-1. -1. -1. ...  1.  1.  1.]]
intercept_
[-0.99970887]
fit
6 ['sofa_coagulation' 'sofa_cardiovascular' 'sofa_cns' 'sofa_renal' 'gender'
 'charlson_comorbidity_index']




decision function 
 [ 627.14508645 -323.7791666  -112.86716644 ...  994.35473727  424.68229786
 1358.26819574]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.53      0.53      0.53      6993
 Intubation True       0.53      0.53      0.53      7071

        accuracy                           0.53     14064
       macro avg       0.53      0.53      0.53     14064
    weighted avg       0.53      0.53      0.53     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.53


