In [1]:
import numpy as np
import optunity
import optunity.metrics

original_data = np.genfromtxt('../../../working_data/normalised_delta_updrs_sigfall.csv', delimiter=',', skip_header=True)
_, n_columns = original_data.shape

data = original_data[:,0:(n_columns - 1)]
labels = original_data[:,(n_columns - 1)]

# Common cross validator for all models
cv_decorator = optunity.cross_validated(x=data, y=labels, num_folds=10)

results = []

In [27]:
# Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB

def gnb_tuned_auroc(x_train, y_train, x_test, y_test, sigfall_prior):
    no_fall_prior = 1.0 - sigfall_prior
    model = GaussianNB(priors=[no_fall_prior,sigfall_prior]).fit(x_train, y_train)
    decision_values = model.predict(x_test)
    auc = optunity.metrics.roc_auc(y_test, decision_values)
    return auc
    
    
gnb_tuned_auroc = cv_decorator(gnb_tuned_auroc)
gnb_optimal_pars, gnb_info, _ = optunity.maximize(gnb_tuned_auroc, solver_name='grid search', num_evals=100, sigfall_prior=[0.01,0.99])

print("Optimal parameters" + str(gnb_optimal_pars))
print("AUROC of tuned model: %1.3f" % gnb_info.optimum)

results.append({'model': 'Gaussian Naive Bayes',
               'Optimal parameters': gnb_optimal_pars,
               'ROC_AUC': gnb_info.optimum
               })

Optimal parameters{'sigfall_prior': 0.7695}
AUROC of tuned model: 0.683


In [24]:
# Support Vector Machines
import sklearn.svm

# Finding the optimum SVM by abstracting over possible Kernel functions:
space = {'kernel': {'linear': {'C': [0, 2], 'sigfall_class_weight':[0,25]},
                    'rbf': {'logGamma': [-5, 0], 'C': [0, 10], 'sigfall_class_weight':[0,25]},
                    'poly': {'degree': [2, 5], 'C': [0, 5], 'coef0': [0, 2], 'sigfall_class_weight':[0,25]}
                    }
         }

# Need to use conditional func because sci-kit wont take None argument types
def train_model(x_train, y_train, kernel, C, logGamma, degree, coef0, sigfall_class_weight):
    if kernel == 'linear':
        model = sklearn.svm.SVC(kernel=kernel, C=C, class_weight={1: sigfall_class_weight})
    elif kernel == 'poly':
        model = sklearn.svm.SVC(kernel=kernel, C=C, degree=degree, coef0=coef0, class_weight={1: sigfall_class_weight})
    elif kernel == 'rbf':
        model = sklearn.svm.SVC(kernel=kernel, C=C, gamma=10 ** logGamma, class_weight={1: sigfall_class_weight})
    else:
        raise ArgumentError("Unknown kernel function: %s" % kernel)
    model.fit(x_train, y_train)
    return model

def svm_tuned_auroc(x_train, y_train, x_test, y_test, kernel='linear', C=0, logGamma=0, degree=0, coef0=0, sigfall_class_weight=0):
    model = train_model(x_train, y_train, kernel, C, logGamma, degree, coef0, sigfall_class_weight)
    decision_values = model.decision_function(x_test)
    return optunity.metrics.roc_auc(y_test, decision_values)

svm_tuned_auroc = cv_decorator(svm_tuned_auroc)

svm_optimal_pars, svm_info, _ = optunity.maximize_structured(svm_tuned_auroc, space, num_evals=150)
print("Optimal parameters" + str(svm_optimal_pars))
print("AUROC of tuned SVM: %1.3f" % svm_info.optimum)

results.append({'model': 'Support Vector Machine',
               'Optimal parameters': svm_optimal_pars,
               'ROC_AUC': svm_info.optimum
               })

Optimal parameters{'kernel': 'rbf', 'C': 4.973967190410129, 'sigfall_class_weight': 6.765950520833336, 'coef0': None, 'degree': None, 'logGamma': -3.4251259403278747}
AUROC of tuned SVM: 0.729


In [56]:
# Linear Discriminant Analysis

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

solvers = {'solver': {'svd': { 'sigfall_prior': [0.01,0.99]}, 
                       'lsqr': { 'sigfall_prior': [0.01,0.99] }, 
                       'eigen': { 'sigfall_prior': [0.01,0.99] } }}

def lda_tuned_auroc(x_train, y_train, x_test, y_test, solver, sigfall_prior=0):
    no_fall_prior = 1.0 - sigfall_prior
    model = LinearDiscriminantAnalysis(solver=solver, priors=[no_fall_prior,sigfall_prior]).fit(x_train, y_train)    
    decision_values = model.predict(x_test)
    return optunity.metrics.roc_auc(y_test, decision_values)

lda_tuned_auroc = cv_decorator(lda_tuned_auroc)

lda_optimal_pars, lda_info, _ = optunity.maximize_structured(lda_tuned_auroc, search_space=solvers, num_evals=100)

print("Optimal parameters" + str(lda_optimal_pars))
print("AUROC of tuned LDA: %1.3f" % lda_info.optimum)

results.append({'model': 'Linear Discriminant Analysis',
               'Optimal parameters': lda_optimal_pars,
               'ROC_AUC': lda_info.optimum
               })

Optimal parameters{'solver': 'lsqr', 'sigfall_prior': 0.6107157879477079}
AUROC of tuned LDA: 0.672


In [58]:
# Quadratic Discriminant Analysis

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

def qda_tuned_auroc(x_train, y_train, x_test, y_test, sigfall_prior):
    no_fall_prior = 1.0 - sigfall_prior
    model = QuadraticDiscriminantAnalysis(priors=[no_fall_prior,sigfall_prior]).fit(x_train, y_train)
    decision_values = model.predict(x_test)
    auc = optunity.metrics.roc_auc(y_test, decision_values)
    return auc
    
    
qda_tuned_auroc = cv_decorator(qda_tuned_auroc)
qda_optimal_pars, qda_info, _ = optunity.maximize(qda_tuned_auroc, solver_name='grid search', num_evals=100, sigfall_prior=[0.01,0.99])

print("Optimal parameters" + str(qda_optimal_pars))
print("AUROC of tuned model: %1.3f" % qda_info.optimum)

results.append({'model': 'Quadratic Discriminant Analysis',
               'Optimal parameters': qda_optimal_pars,
               'ROC_AUC': qda_info.optimum
               })

Optimal parameters{'sigfall_prior': 0.8871}
AUROC of tuned model: 0.677


In [2]:
# Random Forest (Decision Tree)

from sklearn.ensemble import RandomForestClassifier



In [75]:
# NN using grid search across alpha & nodes (1 hidden layer pressummed satisfactory)
# NN 1 - ADAM algorithm
from sklearn.neural_network import MLPClassifier


def nn_adam_tuned_auroc(x_train, y_train, x_test, y_test, alpha, n_nodes):
    model = MLPClassifier(solver='adam', alpha=alpha, hidden_layer_sizes=(int(n_nodes)), random_state=123, max_iter=1000).fit(x_train, y_train)
    decision_values = model.predict(x_test)
    auc = optunity.metrics.roc_auc(y_test, decision_values)
    return auc

nn_adam_tuned_auroc = cv_decorator(nn_adam_tuned_auroc)
nn_adam_optimal_pars, nn_adam_info, _ = optunity.maximize(nn_adam_tuned_auroc, solver_name='random search', num_evals=10, alpha=[0.1,1], n_nodes=[20,120])
print("Optimal parameters" + str(nn_adam_optimal_pars))
print("AUROC of tuned model: %1.3f" % nn_adam_info.optimum)

results.append({'model': 'Neural Net - ADAM',
               'Optimal parameters': nn_adam_optimal_pars,
               'ROC_AUC': nn_adam_info.optimum
               })

Optimal parameters{'alpha': 0.7283141958125202, 'n_nodes': 44.2946850286549}
AUROC of tuned model: 0.500


In [76]:
# NN2 - SGD

def nn_sgd_tuned_auroc(x_train, y_train, x_test, y_test, alpha, n_nodes):
    model = MLPClassifier(solver='sgd', alpha=alpha, hidden_layer_sizes=(int(n_nodes)), random_state=123, max_iter=1000).fit(x_train, y_train)
    decision_values = model.predict(x_test)
    auc = optunity.metrics.roc_auc(y_test, decision_values)
    return auc

nn_sgd_tuned_auroc = cv_decorator(nn_sgd_tuned_auroc)
nn_sgd_optimal_pars, nn_sgd_info, _ = optunity.maximize(nn_sgd_tuned_auroc, solver_name='random search', num_evals=10, alpha=[0.1,1], n_nodes=[20,120])
print("Optimal parameters" + str(nn_sgd_optimal_pars))
print("AUROC of tuned model: %1.3f" % nn_sgd_info.optimum)

results.append({'model': 'Neural Net - SGD',
               'Optimal parameters': nn_sgd_optimal_pars,
               'ROC_AUC': nn_sgd_info.optimum
               })

Optimal parameters{'alpha': 0.4405680121268508, 'n_nodes': 62.47042690345936}
AUROC of tuned model: 0.500


In [86]:
# NN3 - lbfgs

def nn_lbfgs_tuned_auroc(x_train, y_train, x_test, y_test, alpha, n_nodes):
    model = MLPClassifier(solver='lbfgs', alpha=alpha, hidden_layer_sizes=(int(n_nodes)), random_state=123, max_iter=5000).fit(x_train, y_train)
    decision_values = model.predict(x_test)
    auc = optunity.metrics.roc_auc(y_test, decision_values)
    return auc

nn_lbfgs_tuned_auroc = cv_decorator(nn_lbfgs_tuned_auroc)
nn_lbfgs_optimal_pars, nn_lbfgs_info, _ = optunity.maximize(nn_lbfgs_tuned_auroc, solver_name='random search', num_evals=10, alpha=[0.1,10], n_nodes=[5,60])
print("Optimal parameters" + str(nn_lbfgs_optimal_pars))
print("AUROC of tuned model: %1.3f" % nn_lbfgs_info.optimum)

results.append({'model': 'Neural Net - L-BFGS',
               'Optimal parameters': nn_lbfgs_optimal_pars,
               'ROC_AUC': nn_lbfgs_info.optimum
               })

Optimal parameters{'alpha': 4.9314823354150645, 'n_nodes': 24.603104980639955}
AUROC of tuned model: 0.500


In [3]:
# NN scratch pad - seems they are a little too complex for automated hyper-parameter optimization
from sklearn.neural_network import MLPClassifier

len(data)
xx_train = data[0:1000]
xx_test  = data[1001:1400]
yy_test  = labels[1001:1400]
yy_train = labels[0:1000]

# print(xx_train)
# print(yy_train)

m = MLPClassifier(solver='lbfgs', max_iter=5000, alpha=0.1, hidden_layer_sizes=(15,2)).fit(xx_train, yy_train)
dv = m.predict(xx_test)
print(optunity.metrics.roc_auc(yy_test, dv))

optunity.metrics.contingency_table(yy_test, dv, positive=True)
# print(optunity.metrics.roc_auc(yy_test, dv))
# print(optunity.metrics.precision(yy_test, dv, positive=True)


0.5


(0, 0, 365, 34)

34