In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN

smoten_median_imputed_less_40 = pd.read_csv('smoten_median_imputed_less_40.csv', index_col=False)

pd.set_option("display.max_columns", None)

def classifyScale_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('coef_')
    print(clf[1].coef_)
    print('intercept_')
    print(clf[1].intercept_)
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')

    
print('### SMOTEN median all undr 40 missing ###')
print()




print('linear SGDClassifier SVM StandardScaler')
print()

X = smoten_median_imputed_less_40.loc[:, ~smoten_median_imputed_less_40.columns.isin(['mbp_min', 'mbp_mean', 'sbp_max', 'dbp_min', 'dbp_max', 'temperature_max', 'glucose_min', 'wbc_min', 'creatinine_min', 'hemoglobin_max', 'charlson_comorbidity_index','outcome'])]
y = smoten_median_imputed_less_40['outcome']
sgdClass = SGDClassifier(loss='hinge', penalty='l2', alpha=0.1001, 
                         l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=0.001, 
                         shuffle=True, verbose=0, epsilon=0.1, n_jobs=None, random_state=None, 
                         learning_rate='optimal', eta0=0.0, power_t=0.5, early_stopping=True, validation_fraction=0.9, 
                         n_iter_no_change=5, class_weight=None, warm_start=False, average=False)

classifyScale_coef(StandardScaler(), sgdClass, 5, X, y)




print('linear SGDClassifier SVM RobustScaler')
print()


X = smoten_median_imputed_less_40.loc[:, ~smoten_median_imputed_less_40.columns.isin(['heart_rate_max', 'mbp_min', 'mbp_max', 'sbp_max', 'dbp_max', 'wbc_max', 'creatinine_max', 'sofa_cardiovascular','outcome'])]
y = smoten_median_imputed_less_40['outcome']


sgdClass = SGDClassifier(loss='hinge', penalty='l2', alpha=0.1001, 
                         l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=0.001, 
                         shuffle=True, verbose=0, epsilon=0.1, n_jobs=None, random_state=None, 
                         learning_rate='optimal', eta0=0.0, power_t=0.5, early_stopping=True, validation_fraction=0.5, 
                         n_iter_no_change=5, class_weight=None, warm_start=False, average=False)
classifyScale_coef(RobustScaler(), sgdClass, 5, X, y)




### SMOTEN median all undr 40 missing ###

linear SGDClassifier SVM StandardScaler

accuracy of each fold - [0.968287827076223]
Avg accuracy : 0.1936575654152446
score 
 0.9665457906712173
coef_
[[ 0.04498572 -0.44199625 -0.01044735 -0.05735932 -0.20192429 -0.06747567
  -0.17143467 -0.19940387 -0.21918222 -0.01826726 -0.2447108  -0.14826095
  -0.21594507 -0.15061895 -0.292038   -0.12915017 -0.11245803 -0.44179025
  -0.19772046 -0.14235396  0.1814721  -0.12595252]]
intercept_
[-0.17585022]
fit
22 ['gender' 'age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean'
 'mbp_max' 'sbp_min' 'sbp_mean' 'dbp_mean' 'temperature_min'
 'temperature_mean' 'glucose_max' 'wbc_max' 'creatinine_max'
 'hemoglobin_min' 'pt_min' 'pt_max' 'urineoutput' 'sofa_coagulation'
 'sofa_cardiovascular' 'sofa_cns' 'sofa_renal']
decision function 
 [-2714.0572348   -465.35191089  -688.08874223 ...  -183.75217585
  -221.63911594  -497.8854091 ]
classification report 
                   precision    recall  f1-score   s



25 ['gender' 'age' 'heart_rate_min' 'heart_rate_mean' 'mbp_mean' 'sbp_min'
 'sbp_mean' 'dbp_min' 'dbp_mean' 'temperature_min' 'temperature_max'
 'temperature_mean' 'glucose_min' 'glucose_max' 'wbc_min' 'creatinine_min'
 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max' 'urineoutput'
 'sofa_coagulation' 'sofa_cns' 'sofa_renal' 'charlson_comorbidity_index']
decision function 
 [ -856.61943028  -989.65999722 -1468.30194503 ...  -557.38155356
  -519.38727563  -281.39341753]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      0.97      0.96      7043
 Intubation True       0.97      0.96      0.96      7021

        accuracy                           0.96     14064
       macro avg       0.96      0.96      0.96     14064
    weighted avg       0.96      0.96      0.96     14064

Micro-averaged One-vs-Rest ROC AUC score:
0.96






no smoting only imputed

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN

median_imputed_less_40 = pd.read_csv('median_imputed_contin.csv', index_col=False)

pd.set_option("display.max_columns", None)

def classifyScale_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('coef_')
    print(clf[1].coef_)
    print('intercept_')
    print(clf[1].intercept_)
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print('decision function \n', clf[1].decision_function(X_test))
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print('\n')

    
print('### imputed only median all undr 40 missing ###')
print()
sgdClass = SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=0.001, shuffle=True, verbose=0, epsilon=0.1, n_jobs=None, random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight=None, warm_start=False, average=False)
X = median_imputed_less_40.drop('outcome',axis= 1)
y = median_imputed_less_40['outcome']

print('linear SGDClassifier SVM StandardScaler')
print()
classifyScale_coef(StandardScaler(), sgdClass, 5, X, y)

print('linear SGDClassifier SVM RobustScaler')
classifyScale_coef(RobustScaler(), sgdClass, 5, X, y)





### imputed only median all undr 40 missing ###

linear SGDClassifier SVM StandardScaler

accuracy of each fold - [0.9640948334932163]
Avg accuracy : 0.19281896669864326
score 
 0.9634488901068786
coef_
[[-3.01727296e-02  1.08026472e-02 -5.18495413e-02 -1.98509925e-02
  -6.02323529e-03  1.10841435e-02  8.34716263e-02 -1.95528175e-02
   5.15867605e-02 -5.90524068e-02 -1.85217068e-02  4.31220797e-03
  -9.06205837e-02 -3.29591739e-02 -3.19037403e-02  7.67986990e-03
   2.81084523e-02  6.27100487e-03  1.13607131e-02 -3.81620168e-05
  -1.21270144e-02 -3.68245272e-02 -4.43456381e-02 -2.49484392e-03
   1.33057489e-02  3.04188980e-03 -2.94799725e-02  1.06606312e-02]]
intercept_
[-1.27013026]
fit
28 ['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


decision function 
 [-162.19880051 -101.61703854 -143.81138235 ...  -75.43852117  -45.0652224
 -100.98431477]
classification report 
                   precision    recall  f1-score   support

Intubation False       0.96      1.00      0.98      7035
 Intubation True       0.00      0.00      0.00       262

        accuracy                           0.96      7297
       macro avg       0.48      0.50      0.49      7297
    weighted avg       0.93      0.96      0.95      7297

Micro-averaged One-vs-Rest ROC AUC score:
0.50


linear SGDClassifier SVM RobustScaler
accuracy of each fold - [0.9624503220501576]
Avg accuracy : 0.19249006441003153
score 
 0.9633118662647301
coef_
[[ 0.00229679  0.01130363  0.03509509  0.00199525 -0.01470979  0.003957
   0.09709899  0.00680446  0.01442559 -0.0510327   0.02906648  0.00129184
  -0.10625401  0.08241962  0.0355257   0.00037938 -0.02005757 -0.14099262
   0.02929717  0.02104483 -0.01507151 -0.05328855 -0.02329233  0.0377135
   0.09593135 -0.21647

