In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression

from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

from baseline_package.base_predictor import BasePredictor

In [2]:
path = "dataset/preprocessed.csv"

In [3]:
features = [
    'Age', 'Sex_Female', 'Diabetes Mellitus_Yes',
    'Hypertension_Yes', 'Hyperlipidemia_Yes',
    'Time since symptoms onset to first medical contact_1-3 days',
    'Time since symptoms onset to first medical contact_1-3 hours',
    'Time since symptoms onset to first medical contact_12-24 hours',
    'Time since symptoms onset to first medical contact_3-12 hours',
    'Time since symptoms onset to first medical contact_<1 hour',
    'Heart Rate>100_2', 'PRESENCE OF MITRAL REGURGITATION_Moderate',
    'PRESENCE OF MITRAL REGURGITATION_Severe', 'VSD_Yes',
    'LV ANEURYSM_Yes', 'KILLIP II-IV_2', 'Time to Treatment >4 Hours_1',
    'TIMI', 'BMI', 'Pack_Year', 'CardiacStatus_Presentation_Cardiogenic Shock',
    'CardiacStatus_Presentation_Heart failure', 'MItype_on_ECG_Anterior Wall MI',
    'MItype_on_ECG_Inferior Wall MI', 'MItype_on_ECG_Lateral Wall MI', 
    'MItype_on_ECG_Posterior Wall MI', 'Systolic BP<100_3',
    'EJECTION FRACTION(%)_20-25', 'EJECTION FRACTION(%)_25-30',
    'EJECTION FRACTION(%)_30-35', 'EJECTION FRACTION(%)_35-40',
    'EJECTION FRACTION(%)_40-45', 'EJECTION FRACTION(%)_>50'
]

In [4]:
rsf = RandomForestClassifier()

param_grid_rsf = {
    'criterion' : ['entropy','log_loss'],
    'n_estimators' : [100,150],
    'max_depth' : [None],
    'min_samples_split' : [3,4,5],
    'min_samples_leaf' : [2,3,4],
    'max_leaf_nodes' : [None],
    'bootstrap' : [False],
    'max_features' : ['sqrt','log2'],
    'min_weight_fraction_leaf' : [0.0],
    'class_weight' : [{0:1,1:1},{0:1, 1:40},'balanced'],
    'warm_start' : [False],
    'ccp_alpha' : [0.02,0.01]
}

bp_rsf = BasePredictor(model = rsf, d_path = path, param_grid = param_grid_rsf, n_splits = 5)
bp_rsf.run(features = features, select_path = None)

In [None]:
bp_rsf.sens

[62.5, 55.00000000000001, 62.5, 67.5, 68.29268292682927]

In [None]:
bp_rsf.specs

[79.37062937062937,
 84.26573426573427,
 78.32167832167832,
 79.72027972027972,
 76.84210526315789]

In [None]:
bp_rsf.best_params

{'bootstrap': False,
 'ccp_alpha': 0.01,
 'class_weight': 'balanced',
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 'log2',
 'max_leaf_nodes': None,
 'min_samples_leaf': 2,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 50,
 'warm_start': False}

In [None]:
log_r = LogisticRegression()

param_grid_logr = {
    'penalty' : ['l2'],
    'dual' : [False],
    'tol' : [1e-3,1e-4],
    'C' : [0.3,0.29,0.28,0.277,0.275,0.274,0.273,0.27,0.267,0.265,0.263,0.26,0.25],
    'fit_intercept' : [True],
    'class_weight' : [{0:1, 1:15},{0:1,1:20},'balanced'],
    'solver' : ['lbfgs','newton-cg'],
    'max_iter' : [5000,10000],
    'warm_start' : [False],
    'n_jobs' : [None,-1],
    'multi_class' : ['auto'],
    'l1_ratio' : [None],
}

bp_logr = BasePredictor(model = log_r, d_path = path, param_grid = param_grid_logr, n_splits = 5)
bp_logr.run(features = features, select_path = None)

In [None]:
bp_logr.sens

[67.5, 57.49999999999999, 60.0, 70.0, 75.60975609756098]

In [None]:
bp_logr.specs

[68.53146853146853,
 78.67132867132867,
 81.46853146853147,
 75.52447552447552,
 75.78947368421053]

In [None]:
bp_logr.best_params

{'C': 0.275,
 'class_weight': 'balanced',
 'dual': False,
 'fit_intercept': True,
 'l1_ratio': None,
 'max_iter': 5000,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'solver': 'newton-cg',
 'tol': 0.001,
 'warm_start': False}

In [None]:
bp_logr.confs

[array([[196,  90],
        [ 13,  27]]),
 array([[225,  61],
        [ 17,  23]]),
 array([[233,  53],
        [ 16,  24]]),
 array([[216,  70],
        [ 12,  28]]),
 array([[216,  69],
        [ 10,  31]])]