In [28]:
import math
import numpy as np
import optunity
import optunity.metrics
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.preprocessing import normalize

# short form for now
original_data = np.genfromtxt('../../working_data/updrsii_short_form.csv', delimiter=',', skip_header=True)
n_rows, n_columns = original_data.shape

data = original_data[:,0:(n_columns - 1)]
labels = original_data[:,(n_columns - 1)]

# Common cross validator for all models
cv_decorator = optunity.cross_validated(x=data, y=labels, num_folds=10)

results = []


In [20]:
# Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB

def gnb_tuned_auroc(x_train, y_train, x_test, y_test, sigfall_prior):
    no_fall_prior = 1.0 - sigfall_prior
    model = GaussianNB(priors=[no_fall_prior,sigfall_prior]).fit(x_train, y_train)
    decision_values = model.predict(x_test)
    auc = optunity.metrics.roc_auc(y_test, decision_values)
    return auc
    
    
gnb_tuned_auroc = cv_decorator(gnb_tuned_auroc)
gnb_optimal_pars, gnb_info, _ = optunity.maximize(gnb_tuned_auroc, solver_name='grid search', num_evals=100, sigfall_prior=[0.01,0.99])

print("Optimal parameters" + str(gnb_optimal_pars))
print("AUROC of tuned model: %1.3f" % gnb_info.optimum)

results.append({'model': 'Gaussian Naive Bayes',
               'Optimal parameters': gnb_optimal_pars,
               'ROC_AUC': gnb_info.optimum
               })

Optimal parameters{'sigfall_prior': 0.024700000000000024}
AUROC of tuned model: 0.633


In [29]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

forest_space = { 'criterion': {
        'gini': {'n_estimators': [50,200], 'cwn': [1,3]},
        'entropy': {'n_estimators': [50,200], 'cwn': [1,3]},
        'log_loss': {'n_estimators': [50,200], 'cwn': [1,3]}
    }
}

def forest_tuned_auroc(x_train, y_train, x_test, y_test, criterion, n_estimators, cwn):
    c = int(cwn)
    nest = math.floor(n_estimators)
    if c == 1 :
        class_weight = 'balanced'
    elif c == 2 :
        class_weight = 'balanced_subsample'
    else :
        class_weight = None
        
    model = RandomForestClassifier(criterion=criterion, n_estimators=nest, class_weight = class_weight).fit(x_train, y_train)
    decision_values = model.predict(x_test)
    auc = optunity.metrics.roc_auc(y_test, decision_values)
    return auc

forest_tuned_auroc = cv_decorator(forest_tuned_auroc)
rf_optimal_pairs, rf_info, _ = optunity.maximize_structured(forest_tuned_auroc, search_space=forest_space, num_evals=100)

print("Optimal parameters" + str(rf_optimal_pairs))
print("AUROC of tuned RF: %1.3f" % rf_info.optimum)
    

NameError: name 'rf_optimal_pars' is not defined

In [31]:
# AdaBoost

from sklearn.ensemble import AdaBoostClassifier

def ada_tuned_auroc(x_train, y_train, x_test, y_test, n_estimators, lrate):
    nest = math.floor(n_estimators)
    model = AdaBoostClassifier(n_estimators=nest, learning_rate=lrate).fit(x_train, y_train)
    decision_values = model.predict(x_test)
    auc = optunity.metrics.roc_auc(y_test, decision_values)
    return auc

ada_tuned_auroc = cv_decorator(ada_tuned_auroc)
ada_optimal_pars, ada_info, _ = optunity.maximize(ada_tuned_auroc, solver_name='grid search', num_evals=100, n_estimators=[50,150], lrate=[1, 10])

print("Optimal parameters" + str(ada_optimal_pars))
print("AUROC of tuned model: %1.3f" % ada_info.optimum)

Optimal parameters{'n_estimators': 72.5, 'lrate': 1.045}
AUROC of tuned model: 0.543
