In [None]:
import sys
sys.path.append("../")
    
def moving_average(l, n=10):
    cumsum, moving_aves = [0], []
    for i, x in enumerate(l, 1):
        cumsum.append(cumsum[i-1] + x)
        if i>=n:
            moving_ave = (cumsum[i] - cumsum[i-n])/n
            moving_aves.append(moving_ave)
    return moving_aves

In [None]:
meta_train_train_loss, meta_train_valid_loss = [], []
meta_train_train_acc, meta_train_valid_acc = [], []
meta_valid_train_loss, meta_valid_valid_loss = [], []
meta_valid_train_acc, meta_valid_valid_acc = [], []

meta_train_loss_ratio = []
meta_train_acc_ratio = []
meta_valid_loss_ratio = []
meta_valid_acc_ratio = []

for meta in open('../log/meta_training_history.txt'): 
    meta = meta.split()
    title = meta[0]
    meta_results = meta[1].split(',')
    if title == "META_TRAIN_TRAIN:":
        meta_train_train_loss.append(float(meta_results[0]))
        meta_train_train_acc.append(float(meta_results[1]))
    elif title == "META_TRAIN_VALID:":
        meta_train_valid_loss.append(float(meta_results[0]))
        meta_train_valid_acc.append(float(meta_results[1]))
    elif title == "META_VALID_TRAIN:": 
        meta_valid_train_loss.append(float(meta_results[0]))
        meta_valid_train_acc.append(float(meta_results[1]))
    elif title == "META_VALID_VALID:":
        meta_valid_valid_loss.append(float(meta_results[0]))
        meta_valid_valid_acc.append(float(meta_results[1]))
    elif title == "META_TRAIN_RATIO:":
        meta_train_loss_ratio.append(float(meta_results[0]))
        meta_train_acc_ratio.append(float(meta_results[1]))
    elif title == "META_VALID_RATIO:": 
        meta_valid_loss_ratio.append(float(meta_results[0]))
        meta_valid_acc_ratio.append(float(meta_results[1]))
        
avg_n = 16

meta_train_train_loss = moving_average(meta_train_train_loss, n=avg_n)
meta_train_valid_loss = moving_average(meta_train_valid_loss, n=avg_n)
meta_train_train_acc = moving_average(meta_train_train_acc, n=avg_n)
meta_train_valid_acc = moving_average(meta_train_valid_acc, n=avg_n)
meta_train_loss_ratio = moving_average(meta_train_loss_ratio, n=avg_n)
meta_train_acc_ratio = moving_average(meta_train_acc_ratio, n=avg_n)

In [None]:
import matplotlib.pyplot as plt

training_conf_log = []
i = 0
for line in open('../log/train-meta-model.log'):
    if line.startswith('2018'):
        if i == 0 : i += 1
        else: break
    else:
        training_conf_log.append(line)

print('TRAINING CONFIGURATION:')
print(''.join(training_conf_log))

plt.rcParams["figure.figsize"] = (16,10)

train, = plt.plot(meta_train_train_loss, '-o', label='train')
valid, = plt.plot(meta_train_valid_loss, '-o', label='valid')
plt.legend(handles=[train, valid])
plt.title("Average meta-batch loss (meta-train, moving average n = {})".format(avg_n))
plt.show()

train, = plt.plot(meta_train_train_acc, '-o', label='train')
valid, = plt.plot(meta_train_valid_acc, '-o', label='valid')
plt.legend(handles=[train, valid])
plt.title("Average meta-batch accuracy (meta-train, moving average n = {})".format(avg_n))
plt.show()

plt.plot(meta_train_loss_ratio, '-o', label='train')
plt.title("Average meta-batch ratio of valid loss to train loss (meta-train, moving average n = {})".format(avg_n))
plt.show()

# plt.plot(meta_train_acc_ratio, '-o', label='train')
# plt.title("Average meta-batch ratio of valid accuracy to train accuracy (meta-train, moving average n = {})".format(avg_n))
# plt.show()

train, = plt.plot(meta_valid_train_loss, '-o', label='train')
valid, = plt.plot(meta_valid_valid_loss, '-o', label='valid')
plt.legend(handles=[train, valid])
plt.title("Average loss (meta-valid)")
plt.show()

train, = plt.plot(meta_valid_train_acc, '-o', label='train')
valid, = plt.plot(meta_valid_valid_acc, '-o', label='valid')
plt.legend(handles=[train, valid])
plt.title("Average accuracy (meta-valid)")
plt.show()

plt.plot(meta_valid_loss_ratio, '-o', label='train')
plt.title("Average ratio of valid loss to train loss (meta-valid)")
plt.show()

# plt.plot(meta_valid_acc_ratio, '-o', label='train')
# plt.title("Average ratio of valid accuracy to train accuracy (meta-valid)")
# plt.show()

In [None]:
# Prepare data for comparison of meta-optimizer with SGD and Adam

import os
from src.datasets.cifar import load_cifar100, cifar_input_shape
from src.datasets.metadataset import load_meta_dataset

from src.training.training_configuration import read_configuration
from src.model.learner.simple_cnn import build_simple_cnn
from keras.optimizers import SGD

train_conf_path = os.path.join(os.environ['CONF_DIR'], 'training_configuration.yml')
conf = read_configuration(train_conf_path)

X_train, y_train, X_test, y_test = load_cifar100()

meta_dataset_path = '../data/cifar100_64_64_2.h5'
meta_dataset = load_meta_dataset(meta_dataset_path, X_train)

learner = build_simple_cnn(cifar_input_shape, conf.classes_per_learner_set)

learner_weights_path = os.path.join(os.environ['LOG_DIR'], "learner_weights.h5")
learner.load_weights(learner_weights_path)
initial_learner_weights = learner.get_weights()

best_sgd_lr = 0.025
best_adam_lr = 0.0007

learner.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.0))  # dummy optimizer

In [None]:
# find best LR for SGD optimizer

from src.utils.comparison import learning_rate_grid_search
from keras.optimizers import SGD

trainings_per_dataset = 4

def sgd_factory(lr):
    return SGD(lr=lr)

best_sgd_lr = learning_rate_grid_search(
                 optimizer_factory=sgd_factory,
                 meta_dataset=meta_dataset,
                 lr_values=[0.02, 0.025, 0.03],
                 n_learner_batches=conf.n_learner_batches,
                 learner_batch_size=conf.learner_batch_size,
                 learner=learner,
                 trainings_per_dataset=trainings_per_dataset,
                 initial_learner_weights=initial_learner_weights)

print("Best SGD LR:", best_sgd_lr)

In [None]:
# find best LR for Adam optimizer

from src.utils.comparison import learning_rate_grid_search
from keras.optimizers import Adam

trainings_per_dataset = 4

def adam_factory(lr):
    return Adam(lr=lr)
    
best_adam_lr = learning_rate_grid_search(
                 optimizer_factory=adam_factory,
                 meta_dataset=meta_dataset,
                 lr_values=[0.00065, 0.00070, 0.00075],
                 n_learner_batches=conf.n_learner_batches,
                 learner_batch_size=conf.learner_batch_size,
                 learner=learner,
                 trainings_per_dataset=trainings_per_dataset,
                 initial_learner_weights=initial_learner_weights)

print("Best Adam LR:", best_adam_lr)

In [None]:
from src.utils.comparison import compare_optimizers
from keras.optimizers import SGD, Adam
from src.model.meta_learner.lstm_model import lstm_meta_learner

meta_model = lstm_meta_learner(learner=learner, hidden_state_size=conf.hidden_state_size,
                               backpropagation_depth=conf.backpropagation_depth,
                               initial_learning_rate=conf.initial_lr,
                               debug_mode=conf.debug_mode)

meta_model.predict_model.compile(loss='mae',  # we don't use loss here anyway
                                 optimizer=SGD(lr=0.0))  # dummy optimizer

best_meta_learner_weights_path = os.path.join(os.environ['LOG_DIR'], "meta_weights_best.h5")
meta_model.load_weights(best_meta_learner_weights_path)

meta_optimizer = meta_model.predict_model

optimizers = [meta_optimizer, SGD(lr=best_sgd_lr), Adam(lr=best_adam_lr)]

trainings_per_dataset = 4

comparison_results = compare_optimizers(meta_dataset=meta_dataset,
                                        optimizers=optimizers,
                                        n_learner_batches=conf.n_learner_batches,
                                        learner_batch_size=conf.learner_batch_size,
                                        learner=learner,
                                        trainings_per_dataset=trainings_per_dataset,
                                        initial_learner_weights=initial_learner_weights)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

comparison_results = [np.array(r) for r in comparison_results]

for results, name in zip(comparison_results, ['meta', 'sgd', 'adam']):
    print("\n" + "*" * 50)
    print(name + " optimizer:")
    print("average loss:", results.mean())
    print("loss std:", results.std())
    print("min loss:", results.min())
    print("max loss:", results.max())
    
print("*" * 50)

sgd_vs_adam = [sgd - adam for sgd, adam in zip(comparison_results[1], comparison_results[2])]
meta_vs_sgd = [meta - sgd for meta, sgd in zip(comparison_results[0], comparison_results[1])]
meta_vs_adam = [meta - adam for meta, adam in zip(comparison_results[0], comparison_results[2])]

plt.rcParams["figure.figsize"] = (16,10)

plt.hist(sgd_vs_adam, bins=100)
plt.title("SGD vs ADAM (loss with sgd - loss with adam)")
plt.show()

worse_count = sum(1 if s > 0 else 0 for s in sgd_vs_adam)
worse_percent = worse_count / len(sgd_vs_adam) * 100
print("SGD was worse than ADAM in {}% trainings".format(worse_percent))
print("SGD was worst compared to ADAM when its loss was greater by {}".format(max(sgd_vs_adam)))
print("SGD was best compared to ADAM when its loss was less by {}".format(min(sgd_vs_adam)))

plt.hist(meta_vs_sgd, bins=100)
plt.title("META vs SGD (loss with meta - loss with sgd)")
plt.show()

worse_count = sum(1 if s > 0 else 0 for s in meta_vs_sgd)
worse_percent = worse_count / len(meta_vs_sgd) * 100
print("META was worse than SGD in {}% trainings".format(worse_percent))
print("META was worst compared to SGD when its loss was greater by {}".format(max(meta_vs_sgd)))
print("META was best compared to SGD when its loss was less by {}".format(min(meta_vs_sgd)))

plt.hist(meta_vs_adam, bins=100)
plt.title("META vs ADAM (loss with meta - loss with adam)")
plt.show()

worse_count = sum(1 if s > 0 else 0 for s in meta_vs_adam)
worse_percent = worse_count / len(meta_vs_adam) * 100
print("META was worse than ADAM in {}% trainings".format(worse_percent))
print("META was worst compared to ADAM when its loss was greater by {}".format(max(meta_vs_adam)))
print("META was best compared to ADAM when its loss was less by {}".format(min(meta_vs_adam)))