Other model tests


- from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.linear_model import LogisticRegression
- import xgboost as xgb
- from sklearn.tree import DecisionTreeClassifier


- Ensemble methods



In [1]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score
from sklearn.model_selection import GridSearchCV, train_test_split, KFold

import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

from scipy import stats
import glob
import time

Using TensorFlow backend.


In [2]:
training_src=[]
test_src = []
validation_src = []

In [3]:
file_list = glob.glob("351_Data/CIFAR_100_filtered/Intermediate Data from Conv layers/*.txt")
for file in file_list:
    f_type = file.split("_pred_")[-1].split("_")[0]
    if f_type == "train":
        training_src.append(file)
    elif f_type == "val":
        validation_src.append(file)
    elif f_type == "test":
        test_src.append(file)

In [4]:
"""
training_src = []
validation_src = []
test_src = []
for x in range(0,100):
    training_src.append("351_Data/CIFAR_100_filtered/Intermediate Data from Conv layers/{}_pred_train_with_target_cut_data.txt".format(x))
    validation_src.append("351_Data/CIFAR_100_filtered/Intermediate Data from Conv layers/{}_pred_val_with_target_cut_data.txt".format(x))
    test_src.append("351_Data/CIFAR_100_filtered/Intermediate Data from Conv layers/{}_pred_test_with_target_cut_data.txt".format(x))

"""

'\ntraining_src = []\nvalidation_src = []\ntest_src = []\nfor x in range(0,100):\n    training_src.append("351_Data/CIFAR_100_filtered/Intermediate Data from Conv layers/{}_pred_train_with_target_cut_data.txt".format(x))\n    validation_src.append("351_Data/CIFAR_100_filtered/Intermediate Data from Conv layers/{}_pred_val_with_target_cut_data.txt".format(x))\n    test_src.append("351_Data/CIFAR_100_filtered/Intermediate Data from Conv layers/{}_pred_test_with_target_cut_data.txt".format(x))\n\n'

In [5]:
def read_data(src):
    with open(src, 'r') as myfile:
        data=myfile.read().replace('\n', '')
    return data

In [6]:
def treat_data(data_string):
    data_string = data_string.replace("]", "")
    data_string = data_string.replace(" ", "")
    data_split = data_string.split("[")
    data_split = [(d.split(",")) for d in data_split]
    data_split_clean = [d[:-1] for d in data_split[:-1]]
    data_split_clean.append(data_split[-1])
    data_split_clean = [d for d in data_split_clean if d]
    return data_split_clean

In [7]:
def split_data(data):
    inp = []
    target = []
    for d in data:
        inp.append([float(x) for x in d[:-1]])
        target.append(float(d[-1]))
    inp = np.array(inp)
    target = np.array(target)
    return inp,target

In [8]:
def get_data_from_source (src):
    data_string = read_data(src)
    data_split = treat_data(data_string)
    return split_data(data_split)

<font color="red"> What does average="macro" do? Would "micro" be better?

In [9]:
def evaluate(model, data):
    y_pred = model.predict(data[0])
    try: predictions = [round(value) for value in y_pred]
    except: predictions = [np.argmax(value) for value in y_pred]
    # evaluate predictions
    d = {}
    d["accuracy"] = accuracy_score(data[1], predictions)
    d["confusion matrix"] = confusion_matrix(data[1], predictions)
    d["precision"] = precision_score(data[1], predictions, average='macro')
    d["recall"] = recall_score(data[1], predictions, average='macro')
    d["f1-score"] = f1_score(data[1], predictions, average='macro')
    # d["roc-auc"] = roc_auc_score(data[1], predictions, )
    d["cohen's kappa"] = cohen_kappa_score(data[1], predictions)
    
    return d

In [10]:
def get_measures(measure, data_type="data", measure_name="Accuracy"):
    return "%s in %s: %.2f" % (measure_name, data_type, measure)

In [11]:
def print_measures(evaluation, t):
    for key in evaluation.keys():
        if key!="confusion matrix":
            print (get_measures(evaluation[key], t, key))
        else:
            print(key)
            print(evaluation[key])

In [12]:
def get_data_from_sources (training_src, test_src, validation_src=""):
    train = get_data_from_source(training_src)
    test = get_data_from_source(test_src)
    try:
        if validation_src !="":
            val = get_data_from_source(validation_src)
        else:
            inp_test, inp_val, target_test, target_val = train_test_split(*test)
            test = (inp_test, target_test)
            val = (inp_val, target_val)
    except: 
        inp_test, inp_val, target_test, target_val = train_test_split(*test)
        test = (inp_test, target_test)
        val = (inp_val, target_val)
    return train, val, test

In [13]:
train_data = []
test_data = []
val_data = []

ind = 0
for ind, _ in enumerate(training_src):
    if ind>=len(validation_src):
        val_src = ""
    else:
        val_src = validation_src[ind]
    train, val, test = get_data_from_sources(training_src[ind], test_src[ind], val_src)
    train_data.append(train)
    test_data.append(test)
    val_data.append(val)

In [14]:
num_classes = int(max(train_data[0][1])+1)

In [15]:
mlp_models = []

In [16]:
model = Sequential()
model.add(Dense(1024, input_shape=(2048,)))
model.add(Activation('elu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=0.0001),
              metrics=['accuracy'])

mlp_models.append(model)

In [17]:
model = Sequential()
model.add(Dense(1024, input_shape=(2048,)))
model.add(Activation('elu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=0.01),
              metrics=['accuracy'])

mlp_models.append(model)

In [18]:
model = Sequential()
model.add(Dense(512, input_shape=(2048,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=0.0001),
              metrics=['accuracy'])

mlp_models.append(model)

In [19]:
model = Sequential()
model.add(Dense(num_classes, input_shape=(2048,)))
model.add(Activation('softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=0.0001),
              metrics=['accuracy'])

mlp_models.append(model)

In [20]:
# for model in mlp_models:
    
epochs=5

Crossvalidation of neural networks done using mlp_models with multiple model architectures in list

In [21]:
parameter_candidates = {}

In [22]:
svm_candidates = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  # {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

In [23]:
rfe_candidates = [
  {'n_estimators': [10, 100, 1000], 'criterion': ['gini', "entropy"]}
]

In [24]:
adb_candidates = [
  {'n_estimators': [100, 1000], 'learning_rate': [1.0, 0.5, 0.1], 'algorithm':["SAMME.R", "SAMME"]}
]

In [25]:
gbc_candidates = [
  {'n_estimators': [100, 1000], 'learning_rate': [0.05, 0.1, 0.5], 'criterion':["friedman_mse"]}
]

In [26]:
lr_candidates= [
    {'penalty': ["l2"], 'solver':['newton-cg', 'saga', 'lbfgs']},
    {'penalty': ["l1"], 'solver':['liblinear', 'saga']}
]

In [27]:
dtc_candidates= [
    {'criterion': ["l1", "l2"], 'splitter':['best']}
]

In [28]:
xgbc_candidates = [
    {
        'max_depth':[10], 'learning_rate':[0.1], 'n_estimators':[1000]     # objective, booster
    }
]

In [29]:
parameter_candidates ["svm"] = svm_candidates
parameter_candidates["rfe"] = rfe_candidates
parameter_candidates ["adb"] = adb_candidates
parameter_candidates["gbc"] = gbc_candidates
parameter_candidates["lr"] = lr_candidates
parameter_candidates ["dtc"] = dtc_candidates
parameter_candidates["xgbc"] = xgbc_candidates

In [48]:
rfe = RandomForestClassifier()
adb = AdaBoostClassifier()
gbc = GradientBoostingClassifier()
lr = LogisticRegression()
dtc = DecisionTreeClassifier()#
xgbc = XGBClassifier()
svc = svm.SVC()
# Genetic Programming-based

benchmark_models = {"rfe":rfe, "xgbc":xgbc, "adb":adb, "gbc":gbc, "lr":lr, "dtc":dtc, "svm":svc}
for x in range(len(mlp_models)):
    benchmark_models["mlp_{}".format(x)] = mlp_models[x] 

In [49]:
def grid_search_model(benchmark_models, parameter_candidates, train, val, test):
    cv_results = {}
    for model_key in benchmark_models.keys():
        model = benchmark_models[model_key]
        print ((" Model: " + str(model_key)+ " ").center(30, '#'))
        try:
            # t0 = time.time()
            clf = GridSearchCV(estimator=model, param_grid=parameter_candidates[model_key], n_jobs=-1, cv=4)
            clf.fit (*train)
            # t1 = time.time()
            # total = t1-t0
            model = clf.best_estimator_
            # print("Training Time: {}".format(total))
            cv_results[model_key] = clf.cv_results_
        except:
            try:
                history = model.fit(train[0], to_categorical(train[1]), epochs = epochs, validation_data = (val[0], to_categorical(val[1])))
            except:
                print("Problem with input shape")
                continue
        
        train_eval = evaluate(model, train)
        val_eval = evaluate(model, val)
        test_eval = evaluate(model, test)
        print_measures(train_eval, "Train")
        print_measures(val_eval, "Validation")
        print_measures(test_eval, "Test")  

        benchmark_models[model_key] = model
    return benchmark_models, cv_results


In [None]:
final_models = []
cv_results = []
for ind, _ in enumerate(train_data):
    m,cv = grid_search_model(benchmark_models, parameter_candidates, train_data[ind], val_data[ind], test_data[ind])
    final_models.append(m)
    cv_results.append(cv)

######### Model: rfe #########
accuracy in Train: 1.00
confusion matrix
[[359   0   0   0   0   0   0   0   0   0]
 [  0 367   0   0   0   0   0   0   0   0]
 [  0   0 370   0   0   0   0   0   0   0]
 [  0   0   0 381   0   0   0   0   0   0]
 [  0   0   0   0 365   0   0   0   0   0]
 [  0   0   0   0   0 362   0   0   0   0]
 [  0   0   0   0   0   0 381   0   0   0]
 [  0   0   0   0   0   0   0 386   0   0]
 [  0   0   0   0   0   0   0   0 395   0]
 [  0   0   0   0   0   0   0   0   0 384]]
precision in Train: 1.00
recall in Train: 1.00
f1-score in Train: 1.00
cohen's kappa in Train: 1.00
accuracy in Validation: 0.14
confusion matrix
[[ 0  8 14 10  0 38  4 17 18  4]
 [ 5 26 21  3  0 45  2 20 10  1]
 [ 0 19 17 11  2  8 22 36 19  2]
 [ 6 37  6 11  4 10 20 29  8  2]
 [ 5 28 15 14  4  7 19 29  4  1]
 [ 1 15 11 11  5 20  8 21 24  4]
 [ 1 34 13 10  0  8 19 24 22  1]
 [ 7 18  9  6  1  4  7 54  7  1]
 [ 1  9 19 18  4 11 10 23 19  2]
 [ 1 20  7 11  0 12 26 35 14  1]]
precision in Validat

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
accuracy in Train: 1.00
confusion matrix
[[359   0   0   0   0   0   0   0   0   0]
 [  0 367   0   0   0   0   0   0   0   0]
 [  0   0 370   0   0   0   0   0   0   0]
 [  0   0   0 381   0   0   0   0   0   0]
 [  0   0   0   0 365   0   0   0   0   0]
 [  0   0   0   0   0 362   0   0   0   0]
 [  0   0   0   0   0   0 381   0   0   0]
 [  0   0   0   0   0   0   0 386   0   0]
 [  0   0   0   0   0   0   0   0 395   0]
 [  0   0   0   0   0   0   0   0   0 384]]
precision in Train: 1.00
recall in Train: 1.00
f1-score in Train: 1.00
cohen's kappa in Train: 1.00
accuracy in Validation: 0.70
confusion matrix
[[104   0   4   0   2   0   1   0   0   2]
 [ 19  96   2   1   3   5   2   4   1   0]
 [  5   4  87   4   7   8   5   2   5   9]
 [  0   2   5  65  33   2   5  14   5   2]
 [  0   2  13  10  82   3   3  12   1   0]
 [  2   2   7   1   3  96   1   0   3   5]
 [  5   1   7   8   1   0  84  17   7   2]
 [  4   1   2   6   1   1  14  76   8   1

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


accuracy in Train: 0.43
confusion matrix
[[196   0  98   0   0   0  65   0   0   0]
 [  0   0 156   0   0  64 123   0  24   0]
 [  0   0 311   0   0  33  22   0   4   0]
 [  0   0  37   0   0 116 167   0  61   0]
 [  0   0  44   0  21 126  94   0  80   0]
 [  0   0  14   0   0 344   4   0   0   0]
 [  0   0   2   0   0   2 367   0  10   0]
 [  0   0  11   0   0  17 281   0  77   0]
 [  0   0   2   0   0  19   8   0 366   0]
 [  4   0  59   0   0 148 169   0   4   0]]
precision in Train: 0.37
recall in Train: 0.43
f1-score in Train: 0.31
cohen's kappa in Train: 0.36
accuracy in Validation: 0.19
confusion matrix
[[ 96   0   0   0   0   0   8   0   9   0]
 [ 70   0   0   0   0   0  26   0  37   0]
 [ 22   0   0   0   1   0  19   0  94   0]
 [  4   0   0   0   0   0  41   0  88   0]
 [  5   0   0   0   4   2  41   0  74   0]
 [ 33   0   2   0   3   2   5   0  75   0]
 [  5   0   0   0   0   0  19   0 108   0]
 [  7   0   0   0   0   0  22   0  85   0]
 [  0   0   0   0   0   0   0   0 116 

accuracy in Train: 0.64
confusion matrix
[[308   9  14   1   2   5   4   3   0  13]
 [ 49 232  15   8  10  19  10   6  14   4]
 [ 16  13 213  19  14  46  13   4  11  21]
 [  1  12  16 243  50  10   8  15  13  13]
 [  0   6  29  87 180  10  12  13  26   2]
 [ 10  17  46  13   8 241   4   0   6  17]
 [  9   6  13  25  12   0 238  37  32   9]
 [  5  10   4  31  29   2  36 206  54   9]
 [  0   8  13  11  37   2  16  11 297   0]
 [ 21   8  18  23   0  27  17  10   4 256]]
precision in Train: 0.65
recall in Train: 0.64
f1-score in Train: 0.64
cohen's kappa in Train: 0.60
accuracy in Validation: 0.09
confusion matrix
[[ 4 21 23 31  8  3  0  5  2 16]
 [54 12 12  3  8  7  0  8  4 25]
 [14  5 39  9 17 23  1  7  2 19]
 [13  6 41  6  9 16  4  7  4 27]
 [13  1 53 11 11 11  1  3  2 20]
 [ 7  9 30 20 28  4  4  7  1 10]
 [12  9 40  5  4  9  7  2  1 43]
 [ 2  2 36  7  3 12  3  7  0 42]
 [15  0 41  8  9  8  1  0  1 33]
 [ 3 10 36 14 18  9  9  1  3 24]]
precision in Validation: 0.10
recall in Validation:



accuracy in Train: 0.97
confusion matrix
[[358   1   0   0   0   0   0   0   0   0]
 [  0 361   1   2   0   2   1   0   0   0]
 [  0   0 362   1   3   1   1   0   2   0]
 [  0   0   1 364   5   1   1   4   3   2]
 [  0   2   1  12 347   3   0   0   0   0]
 [  1   1   0   1   0 356   0   0   3   0]
 [  0   0   1   4   0   0 372   2   1   1]
 [  0   1   1   3   4   0   2 371   4   0]
 [  0   0   1   2   1   0   0   1 390   0]
 [  0   0   3   1   0   7   0   0   0 373]]
precision in Train: 0.97
recall in Train: 0.97
f1-score in Train: 0.97
cohen's kappa in Train: 0.97
accuracy in Validation: 0.07
confusion matrix
[[ 0  3  5 37  3 12 25  9  8 11]
 [ 1  1  7  5  6 18 23 17 24 31]
 [ 0  1 30 15  2 40 11 12 10 15]
 [ 0  0 61  0  2 15 13 10 16 16]
 [ 0  1 87  0  2 11  7  8  7  3]
 [ 0  0 34 11 10 18 12 16 16  3]
 [ 0  1 54  0  2 15  7  3  6 44]
 [ 0  2 61  6  0  8 10  6  2 19]
 [ 0  0 48  3  0 46  1  4  2 12]
 [ 0  4 25 23  5 14  8 14 18 16]]
precision in Validation: 0.05
recall in Validation:

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


accuracy in Train: 1.00
confusion matrix
[[359   0   0   0   0   0   0   0   0   0]
 [  0 367   0   0   0   0   0   0   0   0]
 [  0   0 370   0   0   0   0   0   0   0]
 [  0   0   0 381   0   0   0   0   0   0]
 [  0   0   0   0 365   0   0   0   0   0]
 [  0   0   0   0   0 362   0   0   0   0]
 [  0   0   0   0   0   0 381   0   0   0]
 [  0   0   0   0   0   0   0 386   0   0]
 [  0   0   0   0   0   0   0   0 395   0]
 [  0   0   0   0   0   0   0   0   0 384]]
precision in Train: 1.00
recall in Train: 1.00
f1-score in Train: 1.00
cohen's kappa in Train: 1.00
accuracy in Validation: 0.08
confusion matrix
[[ 0  1 11 38  0 10 15 11  3 24]
 [ 0  1 14 12  2 32 13 16 14 29]
 [ 0  2 41 14  1 50  3  6 12  7]
 [ 0  1 63  4  1 23 12  8 14  7]
 [ 0  0 76  2  1 18  3  5  8 13]
 [ 0  3 45 15  4 18 10 13  8  4]
 [ 0  0 56  2  1 32  6  5  1 29]
 [ 0  2 52  5  1 13  6 12  3 20]
 [ 0  0 46  3  1 51  0  3  1 11]
 [ 0  2 27 15  2 24  5 23 17 12]]
precision in Validation: 0.06
recall in Validation:

<font color="red">Necessity to perform kFold cross-sampling on fully connected NNs to perform statistical tests</font>

In [None]:
final_models

In [None]:
cv_results

In [None]:
def train_model (benchmark_models, train, val):
    for model in benchmark_models.values():
        try: model.fit(*train)
        except ValueError:
            history = model.fit(train[0], to_categorical(train[1]), epochs = epochs, validation_data = (val[0], to_categorical(val[1])))

In [None]:
def evaluate_model(benchmark_models, train, val, test):
    for model_key in benchmark_models.keys():
        try:
            model = benchmark_models[model_key]
            train_eval = evaluate(model,train)
            val_eval = evaluate(model, val)
            test_eval = evaluate(model, test)
            print ((" Model: " + str(model_key)+ " ").center(30, '#'))
            print (get_measures(train_eval, "Train", "Accuracy"))
            print (get_measures(val_eval, "Validation", "Accuracy"))    
            print (get_measures(test_eval, "Test", "Accuracy"))    
        except Exception as ex:
            template = "An exception of type {0} occurred. Arguments:\n{1!r}"
            message = template.format(type(ex).__name__, ex.args)
            print (message)

In [None]:
def train_and_evaluate_model (benchchmark_models, train, val, test, model_iterations=10):
    model_unseen_measures = {}
    for model_key in benchmark_models.keys():
        model_unseen_measures[model_key] = []
        for i in range(model_iterations):
            try:
                model = benchmark_models[model_key]
                try: 
                    model.fit(*train)
                except ValueError:
                    history = model.fit(train[0], to_categorical(train[1]), epochs = epochs, validation_data = (val[0], to_categorical(val[1])))
                train_eval = evaluate(model,train)
                val_eval = evaluate(model, val)
                test_eval = evaluate(model, test)
                model_unseen_measures[model_key].append(test_eval)
                
                print ((" Model: " + str(model_key)+ " ").center(30, '#'))
                print_measures(train_eval, "Train")
                print_measures(val_eval, "Validation")
                print_measures(test_eval, "Test")
            except Exception as ex:
                template = "An exception of type {0} occurred. Arguments:\n{1!r}"
                message = template.format(type(ex).__name__, ex.args)
                print (message)
                model_unseen_measures[model_key].append("NaN")

    return model_unseen_measures

In [None]:
def test_results (model_results):
    p_values = {}
    for m_key in model_results.keys():
        cp_values = []
        ref_model = model_results[m_key]
        for m2_key in model_results.keys():
            comp_model = model_results[m2_key]
            # pv = test_significance(ref_model, comp_model)
            statistic, p_value = stats.ttest_ind(ref_model, comp_model)
            if statistic >= 0:
                cp_values.append(p_value)
            else:
                cp_values.append(-p_value)
        p_values [m_key] = cp_values
    pd_res = pd.DataFrame(p_values, index=model_results.keys())
    pd_res = pd_res[list(model_results.keys())]
    return pd_res

In [35]:
for ind, _ in enumerate (train_data):
    print (("").center(60, '_'))
    print (("").center(60, '#'))
    print ((" Dataset Index: " + str(ind)+ " ").center(60, '#'))
    print (("").center(60, '#'))
    # train_model(benchmark_models, train_data[ind], val_data[ind])
    # evaluate_model(benchmark_models, train_data[ind], val_data[ind], test_data[ind])
    model_results = train_and_evaluate_model(benchmark_models, train_data[ind], val_data[ind], test_data[ind])
    test_pd = test_results (model_results)
    test_pd

____________________________________________________________
############################################################
##################### Dataset Index: 0 #####################
############################################################
######### Model: rfe #########
Accuracy in Train: 99.84%
Accuracy in Validation: 10.24%
Accuracy in Test: 52.30%
######### Model: rfe #########
Accuracy in Train: 99.81%
Accuracy in Validation: 11.52%
Accuracy in Test: 52.00%
######### Model: rfe #########
Accuracy in Train: 99.79%
Accuracy in Validation: 9.84%
Accuracy in Test: 52.90%
######### Model: rfe #########
Accuracy in Train: 99.68%
Accuracy in Validation: 8.32%
Accuracy in Test: 50.90%
######### Model: rfe #########
Accuracy in Train: 99.81%
Accuracy in Validation: 8.56%
Accuracy in Test: 50.80%
######### Model: rfe #########
Accuracy in Train: 99.84%
Accuracy in Validation: 10.96%
Accuracy in Test: 51.80%
######### Model: rfe #########
Accuracy in Train: 99.71%
Accuracy in Validation: 

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
######## Model: mlp_0 ########
Accuracy in Train: 100.00%
Accuracy in Validation: 9.36%
Accuracy in Test: 72.20%
Train on 3750 samples, validate on 1250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
######## Model: mlp_0 ########
Accuracy in Train: 100.00%
Accuracy in Validation: 10.72%
Accuracy in Test: 72.20%
Train on 3750 samples, validate on 1250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
######## Model: mlp_0 ########
Accuracy in Train: 100.00%
Accuracy in Validation: 10.32%
Accuracy in Test: 73.00%
Train on 3750 samples, validate on 1250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
######## Model: mlp_0 ########
Accuracy in Train: 100.00%
Accuracy in Validation: 11.04%
Accuracy in Test: 72.90%
Train on 3750 samples, validate on 1250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
######## Model: mlp_0 ########
Accuracy in Train: 100.00%
Accuracy in Validation: 10.88%
Accuracy in Test: 72.40%

Train on 3750 samples, validate on 1250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
######## Model: mlp_1 ########
Accuracy in Train: 100.00%
Accuracy in Validation: 12.40%
Accuracy in Test: 74.40%
Train on 3750 samples, validate on 1250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
######## Model: mlp_1 ########
Accuracy in Train: 100.00%
Accuracy in Validation: 12.00%
Accuracy in Test: 75.20%
Train on 3750 samples, validate on 1250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
######## Model: mlp_1 ########
Accuracy in Train: 100.00%
Accuracy in Validation: 12.24%
Accuracy in Test: 74.30%
Train on 3750 samples, validate on 1250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
######## Model: mlp_1 ########
Accuracy in Train: 100.00%
Accuracy in Validation: 13.20%
Accuracy in Test: 74.90%
Train on 3750 samples, validate on 1250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
######## Model: mlp_1 ########
Accuracy in Train: 100.

Epoch 4/5
Epoch 5/5
######## Model: mlp_2 ########
Accuracy in Train: 100.00%
Accuracy in Validation: 13.20%
Accuracy in Test: 73.90%


NameError: name 'test_significance' is not defined

In [None]:
def test_significance(conf1, conf2, sig_level=0.05):
    """performs a statistical significance test on the passed lists, treating each list as coming from one distribution

        Args:
            conf1: list of first mse
            conf2: list of second mse
            sig_level: significance level to test for

        Returns:
            list of mse values from letting model run

    """
    statistic, p_value = stats.ttest_ind(conf1,conf2)
    if p_value<sig_level:
        if statistic>0:
            return conf2
        else:
            return conf1
    else:
        return False

- Save outputs to file
- Run each model multiple times to get statistical significance
- Automatically test for statistical significances

In [68]:
test_pd = test_results (model_results)
test_pd

  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Unnamed: 0,rfe,xgbc,adb,gbc,lr,dtc,svm,mlp_0,mlp_1,mlp_2
rfe,1.0,1.797128e-24,-6.067143e-24,8.260538e-24,1.934962e-25,-1.2654040000000001e-17,1.275433e-26,3.055909e-24,7.245837000000001e-25,4.428093999999999e-19
xgbc,-1.797128e-24,,-0.0,-2.929703e-23,4.285574e-257,-1.1425949999999999e-26,6.140541e-264,2.299097e-12,8.784935000000001e-17,3.869942e-05
adb,6.067143e-24,0.0,,4.68132e-48,1.3669339999999999e-278,1.154758e-12,2.921167e-279,1.818783e-34,8.98548e-35,1.415555e-24
gbc,-8.260538e-24,2.929703e-23,-4.68132e-48,1.0,5.157499000000001e-31,-2.827154e-26,5.919476e-36,4.43105e-16,4.389186e-19,1.099647e-07
lr,-1.934962e-25,-4.285574e-257,-1.3669339999999999e-278,-5.157499000000001e-31,1.0,-2.894083e-27,5.140132e-257,-0.05945087,1.263954e-09,0.8375546
dtc,1.2654040000000001e-17,1.1425949999999999e-26,-1.154758e-12,2.827154e-26,2.894083e-27,1.0,4.968795000000001e-28,1.8052549999999998e-26,7.225133e-27,3.061831e-22
svm,-1.275433e-26,-6.140541e-264,-2.921167e-279,-5.919476e-36,-5.140132e-257,-4.968795000000001e-28,1.0,-2.556793e-16,-2.049406e-11,-1.352906e-06
mlp_0,-3.055909e-24,-2.299097e-12,-1.818783e-34,-4.43105e-16,0.05945087,-1.8052549999999998e-26,2.556793e-16,1.0,2.101365e-08,0.4680721
mlp_1,-7.245837000000001e-25,-8.784935000000001e-17,-8.98548e-35,-4.389186e-19,-1.263954e-09,-7.225133e-27,2.049406e-11,-2.101365e-08,1.0,-0.0103215
mlp_2,-4.428093999999999e-19,-3.869942e-05,-1.415555e-24,-1.099647e-07,-0.8375546,-3.061831e-22,1.352906e-06,-0.4680721,0.0103215,1.0


In [66]:
pd.DataFrame(model_results)

Unnamed: 0,adb,dtc,gbc,lr,mlp_0,mlp_1,mlp_2,rfe,svm,xgbc
0,0.339,0.386,0.69,0.73,0.722,0.744,0.689,0.523,0.765,0.705
1,0.339,0.397,0.69,0.73,0.722,0.752,0.726,0.52,0.765,0.705
2,0.339,0.383,0.689,0.73,0.73,0.743,0.735,0.529,0.765,0.705
3,0.339,0.405,0.69,0.73,0.729,0.749,0.736,0.509,0.765,0.705
4,0.339,0.404,0.691,0.73,0.724,0.743,0.737,0.508,0.765,0.705
5,0.339,0.38,0.69,0.73,0.725,0.744,0.739,0.518,0.765,0.705
6,0.339,0.389,0.691,0.73,0.73,0.743,0.736,0.512,0.765,0.705
7,0.339,0.378,0.691,0.73,0.734,0.746,0.736,0.521,0.765,0.705
8,0.339,0.388,0.69,0.73,0.732,0.738,0.737,0.507,0.765,0.705
9,0.339,0.391,0.691,0.73,0.725,0.751,0.739,0.511,0.765,0.705
