In [145]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score
from sklearn.model_selection import GridSearchCV, train_test_split, KFold

import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

from scipy import stats
import glob
import time

In [146]:
folder_name= "20180617_2043"

In [147]:
curr_path = "351_Data\\ILSVRC2012_Validation\\Intermediate\\" + folder_name + "\\"

In [148]:
model_names = [fn.split("\\")[-1] for fn in glob.glob(curr_path + "*")]

In [149]:
model_names

['Inception ResNet', 'InceptionV3', 'Xception']

In [150]:
train_data = []
test_data = []

In [151]:
for fn in model_names:
    train_data.append(np.load(curr_path + str(fn) + "\\train.npy"))
    test_data.append(np.load(curr_path + str(fn) + "\\test.npy"))

In [152]:
train_data[0][0][1]

array([8], dtype=int64)

In [153]:
def iterate_over_datasets(datasets):
    ret_data = []
    for data in datasets:
        # rem here
        n_data = split_input_target(data)
        ret_data.append(n_data)
    return ret_data

In [154]:
def split_input_target (data):
    target = []
    inp = []
    for d in data:
        target.append(d[1][0])
        inp.append(d[0])
    return [np.array(inp), target]

In [155]:
# temp: only test purposes; remember to remove from iterate_over_datasets
import random
def split_input_target_special (data):
    target = []
    inp = []
    for d in data:
        target.append(random.randrange(0,2))
        inp.append(d[0])
    return [np.array(inp), target]

In [156]:
train_data = iterate_over_datasets(train_data)

In [157]:
test_data = iterate_over_datasets(test_data)

In [158]:
def reindex_classes(data):
    for ind, ds in enumerate(data):
        ds[1] = [x - min (ds[1]) for x in ds[1]]
        data[ind] = ds
    return data

In [159]:
train_data = reindex_classes(train_data)
test_data = reindex_classes(test_data)

In [160]:
def evaluate(model, data):
    y_pred = model.predict(data[0])
    try: predictions = [round(value) for value in y_pred]
    except: predictions = [np.argmax(value) for value in y_pred]
    # evaluate predictions
    d = {}
    d["accuracy"] = accuracy_score(data[1], predictions)
    d["confusion matrix"] = confusion_matrix(data[1], predictions)
    d["precision"] = precision_score(data[1], predictions, average='macro')
    d["recall"] = recall_score(data[1], predictions, average='macro')
    d["f1-score"] = f1_score(data[1], predictions, average='macro')
    # d["roc-auc"] = roc_auc_score(data[1], predictions, )
    d["cohen's kappa"] = cohen_kappa_score(data[1], predictions)
    
    return d

In [161]:
def get_measures(measure, data_type="data", measure_name="Accuracy"):
    return "%s in %s: %.2f" % (measure_name, data_type, measure)

In [162]:
def print_measures(evaluation, t):
    for key in evaluation.keys():
        if key!="confusion matrix":
            print (get_measures(evaluation[key], t, key))
        else:
            print(key)
            print(evaluation[key])

In [163]:
def get_data_from_sources (training_src, test_src, validation_src=""):
    train = get_data_from_source(training_src)
    test = get_data_from_source(test_src)
    try:
        if validation_src !="":
            val = get_data_from_source(validation_src)
        else:
            inp_test, inp_val, target_test, target_val = train_test_split(*test)
            test = (inp_test, target_test)
            val = (inp_val, target_val)
    except: 
        inp_test, inp_val, target_test, target_val = train_test_split(*test)
        test = (inp_test, target_test)
        val = (inp_val, target_val)
    return train, val, test

In [164]:
def create_model_A (input_shape, num_classes):
    model = Sequential()
    model.add(Dense(1024, input_shape=input_shape))
    model.add(Activation('elu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adam(lr=0.0001),
                  metrics=['accuracy'])
    return model

In [165]:
def create_model_B (input_shape, num_classes):
    model = Sequential()
    model.add(Dense(512, input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adam(lr=0.0001),
                  metrics=['accuracy'])
    return model

In [166]:
def create_model_C (input_shape, num_classes):
    model = Sequential()
    model.add(Dense(num_classes, input_shape=input_shape))
    model.add(Activation('softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adam(lr=0.0001),
                  metrics=['accuracy'])
    return model

In [167]:
def create_models(input_shape, num_classes):
    mlp_models = []
    mlp_models.append(create_model_A(input_shape, num_classes))
    mlp_models.append(create_model_B(input_shape, num_classes))
    mlp_models.append(create_model_C(input_shape, num_classes))
    return mlp_models

In [168]:
mlp_models = [{}]*len(train_data)
num_classes = len(set(train_data[0][1]))
for tr_ind, tr_data in enumerate(train_data):
    input_shape = tr_data[0][0].shape
    temp_models = create_models(input_shape, num_classes)
    for mi, mm in enumerate(mlp_models):
        mm[model_names[tr_ind]] = temp_models[mi]

In [169]:
epochs=20

In [170]:
svm_candidates = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  # {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

In [171]:
rfe_candidates = [
  {'n_estimators': [10, 100, 1000], 'criterion': ['gini', "entropy"]}
]

In [172]:
adb_candidates = [
  {'n_estimators': [10, 100, 1000], 'learning_rate': [1.0, 0.5, 0.1], 'algorithm':["SAMME.R", "SAMME"]}
]

In [173]:
gbc_candidates = [
  {'n_estimators': [10, 100, 1000], 'learning_rate': [0.05, 0.1, 0.5], 'criterion':["friedman_mse", "mae"]}
]

In [174]:
lr_candidates= [
    {'penalty': ["l1", "l2"]}
]

In [175]:
dtc_candidates= [
    {'criterion': ["l1", "l2"], 'splitter':['best']}
]

In [176]:
xgbc_candidates = [
    {
        'max_depth':[10], 'learning_rate':[0.1], 'n_estimators':[1000]     # objective, booster
    }
]

In [177]:
parameter_candidates = {}
parameter_candidates ["svm"] = svm_candidates
parameter_candidates["rfe"] = rfe_candidates
parameter_candidates ["adb"] = adb_candidates
parameter_candidates["gbc"] = gbc_candidates
parameter_candidates["lr"] = lr_candidates
parameter_candidates ["dtc"] = dtc_candidates
parameter_candidates["xgbc"] = xgbc_candidates

In [178]:
rfe = RandomForestClassifier()
adb = AdaBoostClassifier()
gbc = GradientBoostingClassifier()
lr = LogisticRegression()
dtc = DecisionTreeClassifier()#
xgbc = XGBClassifier()
svc = svm.SVC()
# Genetic Programming-based

benchmark_models = {"rfe":rfe, "xgbc":xgbc, "adb":adb, "gbc":gbc, "lr":lr, "dtc":dtc, "svm":svc}
# benchmark_models = {"rfe":rfe, "lr":lr, "svm":svc}
# benchmark_models = {}
for x in range(len(mlp_models)):
    benchmark_models["mlp_{}".format(x)] = mlp_models[x]

In [181]:
def grid_search_model(benchmark_models, parameter_candidates, train, test,model_name, folds = 2):
    cv_results = {}
    for model_key in benchmark_models.keys():
        model = benchmark_models[model_key]
        print ((" Model: " + str(model_key)+ " ").center(30, '#'))
        try:
            try:
                clf = GridSearchCV(estimator=model, param_grid=parameter_candidates[model_key], n_jobs=-1, cv=folds)
                clf.fit (*train)
                model = clf.best_estimator_
                cv_results[model_key] = clf.cv_results_
            except:
                    model = benchmark_models[model_key][model_name]
                    kf = KFold(n_splits=folds)
                    cv_results[model_key] = []
                    target_cat = np.array(to_categorical(train[1]))
                    # print (target_cat.shape)
                    for train_index, test_index in kf.split(train[0],):
                        X_train, X_test = train[0][train_index], train[0][test_index]
                        y_train, y_test = target_cat[train_index], target_cat[test_index]
                        # print(y_train)
                        history = model.fit(X_train, y_train, epochs = epochs, validation_data = (X_test, y_test))
                        train_res = model.evaluate(X_train, y=y_train)
                        cv_results[model_key].append(train_res)
                    # benchmark_models[model_key] = model
            train_eval = evaluate(model, train)
            test_eval = evaluate(model, test)
            print_measures(train_eval, "Train")
            print_measures(test_eval, "Test")  

            benchmark_models[model_key][model_name] = model
        except:
            print(model_key)
    return benchmark_models, cv_results

In [None]:
final_models = []
cv_results = []
for ind, _ in enumerate(train_data):
    
    print (("").center(60, '_'))
    print (("").center(60, '#'))
    print ((" Dataset Index: " + str(ind)+ " ").center(60, '#'))
    print (("").center(60, '#'))
    
    m,cv = grid_search_model(benchmark_models, parameter_candidates, train_data[ind], test_data[ind], model_names[ind])
    final_models.append(m)
    cv_results.append(cv)

____________________________________________________________
############################################################
##################### Dataset Index: 0 #####################
############################################################
######### Model: rfe #########
accuracy in Train: 1.00
confusion matrix
[[40  0  0  0  0  0  0  0  0  0]
 [ 0 38  0  0  0  0  0  0  0  0]
 [ 0  0 36  0  0  0  0  0  0  0]
 [ 0  0  0 42  0  0  0  0  0  0]
 [ 0  0  0  0 34  0  0  0  0  0]
 [ 0  0  0  0  0 40  0  0  0  0]
 [ 0  0  0  0  0  0 31  0  0  0]
 [ 0  0  0  0  0  0  0 37  0  0]
 [ 0  0  0  0  0  0  0  0 41  0]
 [ 0  0  0  0  0  0  0  0  0 36]]
precision in Train: 1.00
recall in Train: 1.00
f1-score in Train: 1.00
cohen's kappa in Train: 1.00
accuracy in Test: 0.94
confusion matrix
[[ 9  0  1  0  0  0  0  0  0  0]
 [ 0 12  0  0  0  0  0  0  0  0]
 [ 0  0 13  1  0  0  0  0  0  0]
 [ 0  0  0  8  0  0  0  0  0  0]
 [ 0  3  0  0 13  0  0  0  0  0]
 [ 0  0  0  0  0 10  0  0  0  0]
 [ 2  0  0  0  