In [18]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, SGDRegressor, RidgeCV, LassoCV, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTEN


smoten_knn_contin = pd.read_csv('smoten_knn_contin.csv', index_col=False)
smoten_median_imputed_contin = pd.read_csv('smoten_median_imputed_contin.csv', index_col=False)

pd.set_option("display.max_columns", None)


def treeScale_coef(scaleFunc1, estimatorFunc2, k_fold_int, x_array, y_array):
    k = k_fold_int
    kf = KFold(n_splits=k, shuffle=True, random_state=None)    
    clf = make_pipeline(scaleFunc1, estimatorFunc2)

    acc_score = []
    for train_index , test_index in kf.split(X):
        X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
        y_train , y_test = y[train_index] , y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
     
    acc = accuracy_score(y_pred , y_test)
    acc_score.append(acc)
     
    avg_acc_score = sum(acc_score)/k
    print('accuracy of each fold - {}'.format(acc_score))
    print('Avg accuracy : {}'.format(avg_acc_score))

    outcome_labels = ['Intubation False', 'Intubation True']

    #print('parameters \n', clf.get_params())
    print('score \n', clf.score(X_train, y_train))
    print('feature_names_in_')
    print(clf.feature_names_in_)
    print()
    print('feature_importances_')
    print(clf[1].feature_importances_)
    print()
    for i in range(len(clf.feature_names_in_)):
        print(clf.feature_names_in_[i], ":" ,clf[1].feature_importances_[i])
    print()
    print('fit')
    print(clf.fit(X_train, y_train).n_features_in_, clf.fit(X_train, y_train).feature_names_in_)
    print()
    print('classification report \n', classification_report(y_test, y_pred, target_names=outcome_labels))
    micro_roc_auc_ovr = roc_auc_score(y_test, y_pred, multi_class="ovr", average="micro")
    print()
    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{micro_roc_auc_ovr:.2f}")
    print()
    print('Decision Path')
    print(clf[1].decision_path(X))
    print('\n')
    
    
print('### SMOTEN knn contin ###')
print()

X = smoten_knn_contin.drop('outcome',axis= 1)
y = smoten_knn_contin['outcome']


print('Random Forest')

rfmodel = RandomForestClassifier(
                        n_estimators=100, criterion='entropy', 
                        max_depth=None, min_samples_split=2, 
                        min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
                        max_features='sqrt', max_leaf_nodes=None, 
                        min_impurity_decrease=0.0, bootstrap=True, oob_score=True, 
                        n_jobs=4, 
                        random_state=None, verbose=0, warm_start=False, 
                        class_weight=None, ccp_alpha=0.0, max_samples=None)

print('Random Forest StandardScaler')
print()
treeScale_coef(StandardScaler(), rfmodel, 5, X, y)

print('Random Forest RobustScaler')
treeScale_coef(RobustScaler(), rfmodel, 5, X, y)


print('### SMOTEN median impute ###')
print()

X = smoten_median_imputed_contin.drop('outcome',axis= 1)
y = smoten_median_imputed_contin['outcome']

print('Random Forest StandardScaler')
print()
treeScale_coef(StandardScaler(), rfmodel, 5, X, y)

print('Random Forest RobustScaler')
treeScale_coef(RobustScaler(), rfmodel, 5, X, y)




### SMOTEN knn contin ###

Random Forest
Random Forest StandardScaler

accuracy of each fold - [0.9796643913538111]
Avg accuracy : 0.19593287827076222
score 
 0.9999644482366326
feature_names_in_
['age' 'heart_rate_min' 'heart_rate_max' 'heart_rate_mean' 'mbp_min'
 'mbp_max' 'mbp_mean' 'sbp_min' 'sbp_max' 'sbp_mean' 'dbp_min' 'dbp_max'
 'dbp_mean' 'temperature_min' 'temperature_max' 'temperature_mean'
 'glucose_min' 'glucose_max' 'wbc_min' 'wbc_max' 'creatinine_min'
 'creatinine_max' 'hemoglobin_min' 'hemoglobin_max' 'pt_min' 'pt_max'
 'urineoutput' 'gender']

feature_importances_
[0.13081998 0.00926247 0.01028338 0.03669426 0.00964898 0.00997526
 0.04917568 0.0121793  0.00742012 0.09063954 0.00919246 0.01169962
 0.07442169 0.00700935 0.00798681 0.07988597 0.01769094 0.03503068
 0.03766803 0.03266069 0.01551181 0.01838291 0.02459846 0.04612042
 0.02835606 0.02864408 0.15793993 0.0011011 ]

age : 0.13081998493799185
heart_rate_min : 0.00926247156393673
heart_rate_max : 0.010283381667542



(<70320x248104 sparse matrix of type '<class 'numpy.int64'>'
	with 74100244 stored elements in Compressed Sparse Row format>, array([     0,   2567,   5156,   7507,  10058,  12477,  15100,  17565,
        20044,  22521,  24992,  27429,  29758,  32271,  34700,  37165,
        39726,  42291,  44812,  47385,  49780,  52271,  54792,  57327,
        59824,  62233,  64710,  67173,  69544,  72067,  74552,  77017,
        79516,  81989,  84438,  86991,  89374,  91865,  94370,  96865,
        99454, 101903, 104424, 106989, 109372, 111869, 114302, 116779,
       119220, 121665, 124056, 126479, 129054, 131505, 133966, 136493,
       139040, 141587, 144120, 146669, 149264, 151733, 154090, 156525,
       159082, 161639, 164050, 166497, 168954, 171415, 173894, 176383,
       178896, 181407, 183800, 186303, 188746, 191303, 193798, 196313,
       198848, 201233, 203766, 206257, 208716, 211121, 213552, 216053,
       218574, 221083, 223670, 226219, 228668, 231123, 233578, 235979,
       238386, 240899,



(<70320x250662 sparse matrix of type '<class 'numpy.int64'>'
	with 76190476 stored elements in Compressed Sparse Row format>, array([     0,   2443,   4918,   7483,  10068,  12655,  15128,  17721,
        20258,  22827,  25312,  27821,  30402,  32895,  35332,  37917,
        40502,  43083,  45684,  48271,  50824,  53259,  55884,  58503,
        60904,  63291,  65746,  68301,  70764,  73301,  75858,  78391,
        80872,  83391,  85942,  88463,  90876,  93467,  95994,  98495,
       101008, 103621, 106070, 108767, 111340, 113825, 116398, 118919,
       121442, 123879, 126352, 128857, 131404, 133939, 136378, 138937,
       141518, 143981, 146508, 149101, 151544, 154159, 156676, 159197,
       161740, 164223, 166698, 169217, 171732, 174175, 176662, 179071,
       181488, 183939, 186372, 188849, 191398, 193853, 196326, 198767,
       201182, 203587, 206094, 208555, 211052, 213507, 216052, 218483,
       221054, 223401, 225924, 228413, 230848, 233417, 235846, 238277,
       240858, 243301,



(<70320x246150 sparse matrix of type '<class 'numpy.int64'>'
	with 72739749 stored elements in Compressed Sparse Row format>, array([     0,   2605,   5084,   7649,  10062,  12553,  14954,  17471,
        19992,  22411,  24996,  27537,  29966,  32363,  34960,  37415,
        39866,  42385,  44876,  47339,  49770,  52273,  54674,  57081,
        59456,  61881,  64342,  66745,  69276,  71819,  74208,  76669,
        79140,  81541,  84016,  86571,  88982,  91459,  93936,  96343,
        98862, 101345, 103924, 106505, 108920, 111375, 113698, 116159,
       118562, 120999, 123526, 126109, 128652, 131065, 133480, 135827,
       138216, 140721, 143132, 145539, 148106, 150451, 152898, 155295,
       157730, 160111, 162622, 165059, 167594, 170035, 172472, 174909,
       177334, 179799, 182274, 184797, 187230, 189703, 192212, 194775,
       197218, 199735, 202084, 204525, 206932, 209261, 211756, 214167,
       216570, 219159, 221606, 224009, 226494, 228949, 231406, 233863,
       236288, 238781,



(<70320x247246 sparse matrix of type '<class 'numpy.int64'>'
	with 75176565 stored elements in Compressed Sparse Row format>, array([     0,   2479,   4898,   7275,   9710,  12219,  14690,  17177,
        19818,  22321,  24748,  27221,  29744,  32271,  34736,  37259,
        39750,  42177,  44612,  47091,  49624,  52047,  54622,  57073,
        59554,  61975,  64460,  66971,  69322,  71831,  74292,  76845,
        79314,  81747,  84080,  86613,  89170,  91597,  94262,  96793,
        99192, 101649, 104176, 106477, 108912, 111303, 113668, 116035,
       118584, 121039, 123518, 125933, 128376, 130935, 133332, 135733,
       138260, 140797, 143172, 145651, 148180, 150717, 153048, 155453,
       157964, 160513, 163106, 165609, 168038, 170569, 172976, 175401,
       178000, 180585, 183134, 185585, 188076, 190513, 193022, 195517,
       197992, 200547, 203028, 205481, 207896, 210437, 212872, 215299,
       217832, 220199, 222730, 225129, 227536, 230113, 232742, 235173,
       237640, 240025,