In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN

smoten_noncontin = pd.read_csv('smoten_noncontin.csv', index_col=False)

pd.set_option("display.max_columns", None)

def classifyScale_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('coef_')
    print(clf[1].coef_)
    print('intercept_')
    print(clf[1].intercept_)
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')

    
print('### SMOTEN median noncontin###')
print()
sgdClass = SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=0.001, shuffle=True, verbose=0, epsilon=0.1, n_jobs=None, random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight=None, warm_start=False, average=False)
X = smoten_noncontin.drop('outcome',axis= 1)
y = smoten_noncontin['outcome']

print('linear SGDClassifier SVM StandardScaler')
print()
classifyScale_coef(StandardScaler(), sgdClass, 5, X, y)

print('linear SGDClassifier SVM RobustScaler')
classifyScale_coef(RobustScaler(), sgdClass, 5, X, y)




### SMOTEN median noncontin###

linear SGDClassifier SVM StandardScaler

accuracy of each fold - [0.6987343572241184]
Avg accuracy : 0.13974687144482367
score 
 0.6997475824800911
coef_
[[ 0.01766996  0.12610254  1.01912707 -0.04806514  0.12721808 -0.1386083 ]]
intercept_
[-0.06886298]
fit
6 ['sofa_coagulation' 'sofa_cardiovascular' 'sofa_cns' 'sofa_renal' 'gender'
 'charlson_comorbidity_index']
decision function 
 [-0.24144354  0.67816545 -0.52778204 ... -0.39131535  2.82163878
  0.6173966 ]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.66      0.81      0.73      7006
 Intubation True       0.76      0.59      0.66      7058

        accuracy                           0.70     14064
       macro avg       0.71      0.70      0.70     14064
    weighted avg       0.71      0.70      0.69     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.70


linear SGDClassifier SVM RobustScaler
accuracy of each fold - [0.6853668941979



6 ['sofa_coagulation' 'sofa_cardiovascular' 'sofa_cns' 'sofa_renal' 'gender'
 'charlson_comorbidity_index']
decision function 
 [-0.55968235  1.24177658  1.2925219  ...  3.40691031  3.45765563
  3.63526426]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.65      0.81      0.72      6968
 Intubation True       0.75      0.56      0.64      7096

        accuracy                           0.69     14064
       macro avg       0.70      0.69      0.68     14064
    weighted avg       0.70      0.69      0.68     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.69






no smoting only imputed

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN

median_imputed_noncontin = pd.read_csv('median_imputed_noncontin.csv', index_col=False)

pd.set_option("display.max_columns", None)

def classifyScale_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('coef_')
    print(clf[1].coef_)
    print('intercept_')
    print(clf[1].intercept_)
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')

    
print('### imputed only median noncontin ###')
print()
sgdClass = SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=0.001, shuffle=True, verbose=0, epsilon=0.1, n_jobs=None, random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight=None, warm_start=False, average=False)
X = median_imputed_noncontin.drop('outcome',axis= 1)
y = median_imputed_noncontin['outcome']

print('linear SGDClassifier SVM StandardScaler')
print()
classifyScale_coef(StandardScaler(), sgdClass, 5, X, y)

print('linear SGDClassifier SVM RobustScaler')
classifyScale_coef(RobustScaler(), sgdClass, 5, X, y)





### imputed only median noncontin ###

linear SGDClassifier SVM StandardScaler

accuracy of each fold - [0.9649170892147458]
Avg accuracy : 0.19298341784294917
score 
 0.9632433543436558
coef_
[[ 0.0020159   0.00640593 -0.03857514 -0.01592275 -0.00113143 -0.0233584 ]]
intercept_
[-1.0772403]
fit
6 ['sofa_coagulation' 'sofa_cardiovascular' 'sofa_cns' 'sofa_renal' 'gender'
 'charlson_comorbidity_index']
decision function 
 [-1.14726011 -1.17070082 -1.23868581 ... -1.30277393 -1.30276609
 -1.25993172]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      1.00      0.98      7041
 Intubation True       0.00      0.00      0.00       256

        accuracy                           0.96      7297
       macro avg       0.48      0.50      0.49      7297
    weighted avg       0.93      0.96      0.95      7297

Micro-averaged One-vs-Rest ROC AUC score:
0.50


linear SGDClassifier SVM RobustScaler
accuracy of each fold - [0.9675208

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
