In [53]:
import math
import numpy as np
from sklearn.model_selection import StratifiedKFold

# short form for now
original_data = np.genfromtxt('../working_data/updrsii_short_form.csv', delimiter=',', skip_header=True)
n_rows, n_columns = original_data.shape

data = original_data[:,0:(n_columns - 1)]
labels = original_data[:,(n_columns - 1)]

cross_fold_validations = StratifiedKFold(n_splits=10, shuffle=True)

overall_results = []

In [54]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score

def model_runner(model_list):
    for train, test in cross_fold_validations.split(data,labels):
        x_train = data[train]
        x_test = data[test]
        y_train = labels[train]
        y_test = labels[test]

        for model in model_list:
            m = model['model']
            m.fit(x_train,y_train)
            predictions = m.predict(x_test)
            roc_auc = roc_auc_score(y_test, predictions)
            f1 = f1_score(y_test,predictions)
            model['f1_scores'].append(f1)
            model['roc_auc_scores'].append(roc_auc)

In [55]:
# Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB
gnb_models = [{'name': 'GNB1', 'model': GaussianNB(priors=[0.1,0.9]), 'f1_scores':[], 'roc_auc_scores':[]},
              {'name': 'GNB2', 'model': GaussianNB(priors=[0.2,0.8]), 'f1_scores':[], 'roc_auc_scores':[]},
              {'name': 'GNB3', 'model': GaussianNB(priors=[0.3,0.7]), 'f1_scores':[], 'roc_auc_scores':[]},
              {'name': 'GNB4', 'model': GaussianNB(priors=[0.4,0.6]), 'f1_scores':[], 'roc_auc_scores':[]},
              {'name': 'GNB5', 'model': GaussianNB(priors=[0.5,0.5]), 'f1_scores':[], 'roc_auc_scores':[]},
              {'name': 'GNB6', 'model': GaussianNB(priors=[0.6,0.4]), 'f1_scores':[], 'roc_auc_scores':[]},
              {'name': 'GNB7', 'model': GaussianNB(priors=[0.7,0.3]), 'f1_scores':[], 'roc_auc_scores':[]},
              {'name': 'GNB8', 'model': GaussianNB(priors=[0.8,0.2]), 'f1_scores':[], 'roc_auc_scores':[]},
              {'name': 'GNB9', 'model': GaussianNB(priors=[0.9,0.1]), 'f1_scores':[], 'roc_auc_scores':[]}]

model_runner(gnb_models)


In [56]:
gnb_models
# try plotting this 

[{'name': 'GNB1',
  'model': GaussianNB(priors=[0.1, 0.9]),
  'f1_scores': [0.23076923076923078,
   0.23076923076923078,
   0.23529411764705882,
   0.23076923076923078,
   0.19607843137254902,
   0.19607843137254902,
   0.19607843137254902,
   0.15999999999999998,
   0.19607843137254902,
   0.19999999999999998],
  'roc_auc_scores': [0.5, 0.5, 0.5125, 0.5, 0.5, 0.5, 0.5, 0.4, 0.5, 0.5]},
 {'name': 'GNB2',
  'model': GaussianNB(priors=[0.2, 0.8]),
  'f1_scores': [0.19607843137254902,
   0.23529411764705882,
   0.24000000000000002,
   0.23529411764705882,
   0.19999999999999998,
   0.19607843137254902,
   0.19999999999999998,
   0.163265306122449,
   0.20408163265306123,
   0.20408163265306123],
  'roc_auc_scores': [0.4166666666666667,
   0.5125,
   0.525,
   0.5125,
   0.5121951219512195,
   0.5,
   0.5121951219512195,
   0.41219512195121955,
   0.524390243902439,
   0.5125]},
 {'name': 'GNB3',
  'model': GaussianNB(priors=[0.3, 0.7]),
  'f1_scores': [0.29411764705882354,
   0.2439024390

In [48]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
