Imports

In [None]:
%matplotlib notebook
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = 9.5, 6
import matplotlib.pyplot as plt

import keras as K
import numpy as np
import tensorflow as tf
import sklearn as skl
import sklearn.model_selection as skl_model_selection
import itertools
import time
from pathlib import Path
import shutil

Obtain and Preprocess Data

In [None]:
%%time
TRUTH_CLASSES = 10
INPUT_DIM = 48

(input_train_orig, truth_train_orig), (input_test_orig, truth_test_orig) = K.datasets.cifar10.load_data()

input_train_big_tensor = tf.image.resize_images(input_train_orig, [INPUT_DIM, INPUT_DIM])
input_test_big_tensor  = tf.image.resize_images(input_test_orig, [INPUT_DIM, INPUT_DIM])

with K.backend.get_session().as_default():
    input_train_big = input_train_big_tensor.eval().astype('uint8')
    input_test_big  = input_test_big_tensor.eval().astype('uint8')
    
input_train_preprocessed = K.applications.vgg19.preprocess_input(input_train_big)
input_test_preprocessed = K.applications.vgg19.preprocess_input(input_test_big)
truth_train = K.utils.to_categorical(truth_train_orig, TRUTH_CLASSES)
truth_test = K.utils.to_categorical(truth_test_orig, TRUTH_CLASSES)

Create Callbacks

In [None]:
def get_model_name(num):
    return "model-{num}".format(num=num)

def get_tb_callback(name):
    path = "./exp-graphs/" + name
    return K.callbacks.TensorBoard(log_dir=path, write_graph=False)

def get_checkpointing_callback(name):
    path = "./exp-models/" + name + "-epoch-{epoch:02d}.hdf5"
    return K.callbacks.ModelCheckpoint(filepath=path, verbose=1, save_best_only=True)

Encapsulation of training

In [None]:
OVERTRAINING_FACTOR = 2.5
def create_model(model_no, initial_epochs=6, batch=32):
    K.backend.clear_session()
    np.random.seed(model_no)
    
    name = get_model_name(model_no)
    best_weights = Path("./exp-models") / (name + "-overtrained.hdf5")
    overtrained_weights = Path("./exp-models") / (name + "-overtrained.hdf5")
    graphs_dir = Path("./exp-graph") / name
    
    if not overtrained_weights.exists():
        print(">>> Begin train for model", model_no)
        bag_indexes = np.random.randint(0, len(input_train_preprocessed)-1, len(input_train_preprocessed))
        x_train_bag = input_train_preprocessed[bag_indexes]
        y_train_bag = truth_train[bag_indexes]
    
    
        if best_weights.exists():
            best_weights.unlink()
        if graphs_dir.exists():
            shutil.rmtree(graphs_dir)

        tensorboard_callback = get_tb_callback(name)
        checkpoint_callback = get_checkpointing_callback(name)

        vgg19 = K.applications.vgg19.VGG19(
            include_top=False,
            weights='imagenet',
            input_shape=(INPUT_DIM, INPUT_DIM, 3),
        )
        
        for layer in vgg19.layers:
            layer.trainable = False

        x = K.layers.Flatten()(vgg19.output)
        x = K.layers.Dense(261, activation='relu')(x)
        output = K.layers.Dense(TRUTH_CLASSES, activation='softmax')(x)
        model = K.models.Model(vgg19.input, output)

        model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

        h = model.fit(
            x_train_bag,
            y_train_bag,
            epochs=initial_epochs,
            batch_size=batch,
            shuffle=True,
            verbose=2,
            validation_split=0.2,
            callbacks=[tensorboard_callback, checkpoint_callback]
        )
        best_epoch = np.argmin(h.history["val_loss"]) + 1
        overtrained_epoch = int(best_epoch * OVERTRAINING_FACTOR)
        print(">>> Best epoch found:", best_epoch, "Overtraining to:", overtrained_epoch)
        
        h = model.fit(
            x_train_bag,
            y_train_bag,
            initial_epoch=initial_epochs,
            epochs=overtrained_epoch,
            batch_size=batch,
            shuffle=True,
            verbose=1,
            validation_split=0.2,
            callbacks=[tensorboard_callback]
        )
        model.save(overtrained_weights)

    print(">>> Obtained best and overtrained for model", model_no)
    return best_weights, overtrained_weights

Train all the networks, and obtain predictions on the testing dataset

In [None]:
NUM_MODELS = 100
Path("./exp-models").mkdir(exist_ok=True)
accuracy_best = []
accuracy_over = []
predictions_best = []
predictions_over = []
for model_no in range(1, NUM_MODELS+1):
    
    # if predictions and accuracy file does not exist - run evaluations and predictions and save results
    best_weights_path, overtrained_weights_path = create_model(model_no)
    
    bm = K.models.load_model(best_weights_path)
    om = K.models.load_model(overtrained_weights_path)
    
    predictions_best.append(bm.evaluate(input_test_preprocessed, truth_test, verbose=1))
    predictions_over.append(om.evaluate(input_test_preprocessed, truth_test, verbose=1))
    
    predictions_best.append(bm.predict(input_test_preprocessed, verbose=1))
    predictions_over.append(om.predict(input_test_preprocessed, verbose=1))
    
ensemble_pred_best = np.mean(np.array(predictions_best), axis=0)
ensemble_pred_over = np.mean(np.array(predictions_best), axis=0)

Calculate Loss and Accuracy for the Ensembles

In [None]:
metrics_best = K.losses.categorical_crossentropy(ensemble_pred_best, truth_test)
metrics_over = K.losses.categorical_crossentropy(ensemble_pred_over, truth_test)
print("Best:", metrics_best)
print("Over:", metrics_over)

Create confusion matrices 