In [540]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score
from sklearn.model_selection import GridSearchCV, train_test_split, KFold

import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

from scipy import stats
import glob
import time

In [457]:
folder_name= "20180616_2158"

In [458]:
curr_path = "351_Data\\ILSVRC2012_Validation\\Intermediate\\" + folder_name + "\\"

In [459]:
model_names = [fn.split("\\")[-1] for fn in glob.glob(curr_path + "*")]

In [460]:
model_names

['Inception ResNet', 'InceptionV3', 'Xception']

In [461]:
train_data = []
test_data = []

In [462]:
for fn in model_names:
    train_data.append(np.load(curr_path + str(fn) + "\\train.npy"))
    test_data.append(np.load(curr_path + str(fn) + "\\test.npy"))

In [463]:
train_data[0][0][1]

array([822], dtype=int64)

In [464]:
def iterate_over_datasets(datasets):
    ret_data = []
    for data in datasets:
        # rem here
        n_data = split_input_target(data)
        ret_data.append(n_data)
    return ret_data

In [465]:
def split_input_target (data):
    target = []
    inp = []
    for d in data:
        target.append(d[1][0])
        inp.append(d[0])
    return [np.array(inp), target]

In [466]:
# temp: only test purposes; remember to remove from iterate_over_datasets
import random
def split_input_target_special (data):
    target = []
    inp = []
    for d in data:
        target.append(random.randrange(0,2))
        inp.append(d[0])
    return [np.array(inp), target]

In [467]:
train_data = iterate_over_datasets(train_data)

In [468]:
test_data = iterate_over_datasets(test_data)

In [469]:
train_data[0][1]

[0, 0, 1, 0, 0, 1, 0]

In [470]:
def evaluate(model, data):
    y_pred = model.predict(data[0])
    try: predictions = [round(value) for value in y_pred]
    except: predictions = [np.argmax(value) for value in y_pred]
    # evaluate predictions
    d = {}
    d["accuracy"] = accuracy_score(data[1], predictions)
    d["confusion matrix"] = confusion_matrix(data[1], predictions)
    d["precision"] = precision_score(data[1], predictions, average='macro')
    d["recall"] = recall_score(data[1], predictions, average='macro')
    d["f1-score"] = f1_score(data[1], predictions, average='macro')
    # d["roc-auc"] = roc_auc_score(data[1], predictions, )
    d["cohen's kappa"] = cohen_kappa_score(data[1], predictions)
    
    return d

In [471]:
def get_measures(measure, data_type="data", measure_name="Accuracy"):
    return "%s in %s: %.2f" % (measure_name, data_type, measure)

In [472]:
def print_measures(evaluation, t):
    for key in evaluation.keys():
        if key!="confusion matrix":
            print (get_measures(evaluation[key], t, key))
        else:
            print(key)
            print(evaluation[key])

In [473]:
def get_data_from_sources (training_src, test_src, validation_src=""):
    train = get_data_from_source(training_src)
    test = get_data_from_source(test_src)
    try:
        if validation_src !="":
            val = get_data_from_source(validation_src)
        else:
            inp_test, inp_val, target_test, target_val = train_test_split(*test)
            test = (inp_test, target_test)
            val = (inp_val, target_val)
    except: 
        inp_test, inp_val, target_test, target_val = train_test_split(*test)
        test = (inp_test, target_test)
        val = (inp_val, target_val)
    return train, val, test

In [474]:
def create_model_A (input_shape, num_classes):
    model = Sequential()
    model.add(Dense(1024, input_shape=input_shape))
    model.add(Activation('elu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adam(lr=0.0001),
                  metrics=['accuracy'])
    return model

In [475]:
def create_model_B (input_shape, num_classes):
    model = Sequential()
    model.add(Dense(512, input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adam(lr=0.0001),
                  metrics=['accuracy'])
    return model

In [476]:
def create_model_C (input_shape, num_classes):
    model = Sequential()
    model.add(Dense(num_classes, input_shape=input_shape))
    model.add(Activation('softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adam(lr=0.0001),
                  metrics=['accuracy'])
    return model

In [477]:
def create_models(input_shape, num_classes):
    mlp_models = []
    mlp_models.append(create_model_A(input_shape, num_classes))
    mlp_models.append(create_model_B(input_shape, num_classes))
    mlp_models.append(create_model_C(input_shape, num_classes))
    return mlp_models

In [478]:
mlp_models = [{}]*len(train_data)
num_classes = len(set(train_data[0][1]))
for tr_ind, tr_data in enumerate(train_data):
    input_shape = tr_data[0][0].shape
    temp_models = create_models(input_shape, num_classes)
    for mi, mm in enumerate(mlp_models):
        mm[model_names[tr_ind]] = temp_models[mi]

In [479]:
mlp_models

[{'Inception ResNet': <keras.models.Sequential at 0x16ea138c240>,
  'InceptionV3': <keras.models.Sequential at 0x16e9fe38b38>,
  'Xception': <keras.models.Sequential at 0x16ead32de80>},
 {'Inception ResNet': <keras.models.Sequential at 0x16ea138c240>,
  'InceptionV3': <keras.models.Sequential at 0x16e9fe38b38>,
  'Xception': <keras.models.Sequential at 0x16ead32de80>},
 {'Inception ResNet': <keras.models.Sequential at 0x16ea138c240>,
  'InceptionV3': <keras.models.Sequential at 0x16e9fe38b38>,
  'Xception': <keras.models.Sequential at 0x16ead32de80>}]

In [480]:
epochs=1

In [481]:
svm_candidates = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  # {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

In [482]:
rfe_candidates = [
  {'n_estimators': [10, 100, 1000], 'criterion': ['gini', "entropy"]}
]

In [483]:
adb_candidates = [
  {'n_estimators': [10, 100, 1000], 'learning_rate': [1.0, 0.5, 0.1], 'algorithm':["SAMME.R", "SAMME"]}
]

In [484]:
gbc_candidates = [
  {'n_estimators': [10, 100, 1000], 'learning_rate': [0.05, 0.1, 0.5], 'criterion':["friedman_mse", "mae"]}
]

In [485]:
lr_candidates= [
    {'penalty': ["l1", "l2"]}
]

In [486]:
dtc_candidates= [
    {'criterion': ["l1", "l2"], 'splitter':['best']}
]

In [487]:
xgbc_candidates = [
    {
        'max_depth':[10], 'learning_rate':[0.1], 'n_estimators':[1000]     # objective, booster
    }
]

In [488]:
parameter_candidates = {}
parameter_candidates ["svm"] = svm_candidates
parameter_candidates["rfe"] = rfe_candidates
parameter_candidates ["adb"] = adb_candidates
parameter_candidates["gbc"] = gbc_candidates
parameter_candidates["lr"] = lr_candidates
parameter_candidates ["dtc"] = dtc_candidates
parameter_candidates["xgbc"] = xgbc_candidates

In [535]:
rfe = RandomForestClassifier()
adb = AdaBoostClassifier()
gbc = GradientBoostingClassifier()
lr = LogisticRegression()
dtc = DecisionTreeClassifier()#
xgbc = XGBClassifier()
svc = svm.SVC()
# Genetic Programming-based

# benchmark_models = {"rfe":rfe, "xgbc":xgbc, "adb":adb, "gbc":gbc, "lr":lr, "dtc":dtc, "svm":svc}
# benchmark_models = {"rfe":rfe, "lr":lr, "svm":svc}
benchmark_models = {}
for x in range(len(mlp_models)):
    benchmark_models["mlp_{}".format(x)] = mlp_models[x]

In [536]:
def grid_search_model(benchmark_models, parameter_candidates, train, test,model_name, folds = 2):
    cv_results = {}
    for model_key in benchmark_models.keys():
        model = benchmark_models[model_key]
        print ((" Model: " + str(model_key)+ " ").center(30, '#'))
        try:
            clf = GridSearchCV(estimator=model, param_grid=parameter_candidates[model_key], n_jobs=-1, cv=folds)
            clf.fit (*train)
            model = clf.best_estimator_
            cv_results[model_key] = clf.cv_results_
        except:
                model = benchmark_models[model_key][model_name]
                kf = KFold(n_splits=folds)
                cv_results[model_key] = []
                target_cat = np.array(to_categorical(train[1]))
                for train_index, test_index in kf.split(train,):
                    X_train, X_test = train[0][train_index], train[0][test_index]
                    y_train, y_test = target_cat[train_index], target_cat[test_index]
                    print(y_train)
                    history = model.fit(X_train, y_train, epochs = epochs, validation_data = (X_test, y_test))
                    train_res = model.evaluate(X_train, y=y_train)
                    cv_results[model_key].append(train_res)
                # benchmark_models[model_key] = model
        train_eval = evaluate(model, train)
        test_eval = evaluate(model, test)
        print_measures(train_eval, "Train")
        print_measures(test_eval, "Test")  
        
        benchmark_models[model_key][model_name] = model
    return benchmark_models, cv_results

In [539]:
final_models = []
cv_results = []
for ind, _ in enumerate(train_data):
    
    print (("").center(60, '_'))
    print (("").center(60, '#'))
    print ((" Dataset Index: " + str(ind)+ " ").center(60, '#'))
    print (("").center(60, '#'))
    
    m,cv = grid_search_model(benchmark_models, parameter_candidates, train_data[ind], test_data[ind], model_names[ind])
    final_models.append(m)
    cv_results.append(cv)

____________________________________________________________
############################################################
##################### Dataset Index: 0 #####################
############################################################
######## Model: mlp_0 ########
{'mlp_0': {'Inception ResNet': <keras.models.Sequential object at 0x0000016EA138C240>, 'InceptionV3': <keras.models.Sequential object at 0x0000016E9FE38B38>, 'Xception': <keras.models.Sequential object at 0x0000016EAD32DE80>}, 'mlp_1': {'Inception ResNet': <keras.models.Sequential object at 0x0000016EA138C240>, 'InceptionV3': <keras.models.Sequential object at 0x0000016E9FE38B38>, 'Xception': <keras.models.Sequential object at 0x0000016EAD32DE80>}, 'mlp_2': {'Inception ResNet': <keras.models.Sequential object at 0x0000016EA138C240>, 'InceptionV3': <keras.models.Sequential object at 0x0000016E9FE38B38>, 'Xception': <keras.models.Sequential object at 0x0000016EAD32DE80>}}
[[ 1.  0.]]
Train on 1 samples, validate on 1 

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)



precision in Train: 0.29
recall in Train: 0.50
f1-score in Train: 0.36
cohen's kappa in Train: 0.00
accuracy in Test: 0.00
confusion matrix
[[0 0]
 [3 0]]
precision in Test: 0.00
recall in Test: 0.00
f1-score in Test: 0.00
cohen's kappa in Test: 0.00
######## Model: mlp_2 ########
{'mlp_0': {'Inception ResNet': <keras.models.Sequential object at 0x0000016EA138C240>, 'InceptionV3': <keras.models.Sequential object at 0x0000016E9FE38B38>, 'Xception': <keras.models.Sequential object at 0x0000016EAD32DE80>}, 'mlp_1': {'Inception ResNet': <keras.models.Sequential object at 0x0000016EA138C240>, 'InceptionV3': <keras.models.Sequential object at 0x0000016E9FE38B38>, 'Xception': <keras.models.Sequential object at 0x0000016EAD32DE80>}, 'mlp_2': {'Inception ResNet': <keras.models.Sequential object at 0x0000016EA138C240>, 'InceptionV3': <keras.models.Sequential object at 0x0000016E9FE38B38>, 'Xception': <keras.models.Sequential object at 0x0000016EAD32DE80>}}
[[ 1.  0.]]
Train on 1 samples, valida