### Import and init GPU

In [1]:
from IPython.display import Audio

In [2]:
import sys
sys.path.append('../../../')

In [3]:
import numpy as np
import math

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn

import pickle
import argparse
import time
import itertools
from copy import deepcopy
import tensorflow as tf

from tuner import HyperparameterTuner
from tuner import MyTask

use_tpu = False
use_gpu = True

if use_tpu:
    from tensorflow.contrib import tpu
    from tensorflow.contrib.cluster_resolver import TPUClusterResolver

if use_gpu:
    import os
    
%load_ext autoreload
%autoreload 2

In [4]:
if use_gpu:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

In [5]:
if use_tpu:
    tpu_cluster = TPUClusterResolver(tpu=[tpu_name]).get_master()
    sess = tf.Session(tpu_cluster)
    sess.run(tpu.initialize_system())
elif use_gpu:
    sess = tf.Session(config=config)
else:
    sess = tf.Session()

In [6]:
from resnet import Network

In [7]:
network = Network()

In [8]:
task_home = ''
if use_tpu:
    pass
#     task_home = 'gs://continual_learning/permMNIST_EWC/'
else:
    task_home = '../../../../'

cur_dir = './'
checkpoint_path = cur_dir + 'checkpoints_99_1_0_distill/'
summaries_path = cur_dir + 'summaries_99_1_0_distill/'
data_path = task_home + 'cifar-100-python/'
split_path = './split.txt' 
if use_tpu:
    tpu_name = 'gectpu'
    
BATCH_SIZE = 128

### Dataset loading functions

In [9]:
label_smooth_param = 0

def splitDataset(dataset, dataset_split, seed):
    np.random.seed(seed)
    task_list = []
    train_labels = np.argmax(dataset.train.labels, axis=1)
    validation_labels = np.argmax(dataset.validation.labels, axis=1)
    test_labels = np.argmax(dataset.test.labels, axis=1)
    for i in range(len(dataset_split)):
        cur_train_indices = [False] * dataset.train.images.shape[0]
        cur_validation_indices = [False] * dataset.validation.images.shape[0]
        cur_test_indices = [False] * dataset.test.images.shape[0]
        for j in range(len(dataset_split[i])):
            cur_train_indices = np.logical_or(cur_train_indices, (train_labels == dataset_split[i][j]))
            cur_validation_indices = np.logical_or(cur_validation_indices, (validation_labels == dataset_split[i][j]))
            cur_test_indices = np.logical_or(cur_test_indices, (test_labels == dataset_split[i][j]))

        task = deepcopy(dataset)
        task.train.images = task.train.images[cur_train_indices]
        task.train.labels = task.train.labels[cur_train_indices]
        task.validation.images = task.validation.images[cur_validation_indices]
        task.validation.labels = task.validation.labels[cur_validation_indices]
        task.test.images = task.test.images[cur_test_indices]
        task.test.labels = task.test.labels[cur_test_indices]
        task = MyTask(task)
        task_list.append(task)

    return task_list
    
def smoothLabels(dataset):
    train_labels = dataset.train.labels
    train_labels_argmax = np.argmax(train_labels, axis=1)
    train_labels = train_labels + label_smooth_param / (train_labels.shape[1] - 1)
    train_labels[range(train_labels.shape[0]), train_labels_argmax] = 1 - label_smooth_param
    dataset.train._labels = train_labels

class TempDataset(object):
    def __init__(self):
        self.images = None
        self.labels = None
    
class TempTask(object):
    def __init__(self):
        self.train = TempDataset()
        self.validation = TempDataset()
        self.test = TempDataset()
    
    
def readDatasets():
    num_class = 100
    class_per_task = 2
    k = 0
    labels_list = list(range(num_class))
    seed = 0
    np.random.seed(seed)
    np.random.shuffle(labels_list)
    split = []
    task_weights = []
    
    split = [range(99), [99]]
    task_weights = [0.99, 0.01]
    num_tasks = len(split)
    
    with open(data_path + 'train', 'rb') as f:
        f_train_data = pickle.load(f, encoding='bytes')
        
    with open(data_path + 'test', 'rb') as f:
        f_test_data = pickle.load(f, encoding='bytes')
        
    cifar_100 = TempTask()
    temp_train_labels = np.array(f_train_data[b'fine_labels'], dtype=np.int32)
    temp_test_labels = np.array(f_test_data[b'fine_labels'], dtype=np.int32)
    f_train_data[b'fine_labels'] = np.zeros((temp_train_labels.shape[0], num_class))
    (f_train_data[b'fine_labels'])[range(temp_train_labels.shape[0]), temp_train_labels] = 1
    f_test_data[b'fine_labels'] = np.zeros((temp_test_labels.shape[0], num_class))
    (f_test_data[b'fine_labels'])[range(temp_test_labels.shape[0]), temp_test_labels] = 1
    f_train_data[b'data'] = np.reshape(f_train_data[b'data'], (-1, 3, 32, 32))
    f_test_data[b'data'] = np.reshape(f_test_data[b'data'], (-1, 3, 32, 32))
    f_train_data[b'data'] = np.transpose(f_train_data[b'data'], (0, 2, 3, 1))
    f_test_data[b'data'] = np.transpose(f_test_data[b'data'], (0, 2, 3, 1))
    
    tr_data = f_train_data[b'data']
    te_data = f_test_data[b'data']
    # normalizing data
    avg = np.mean(tr_data, axis=(0, 1, 2))
    std = np.std(tr_data, axis=(0, 1, 2))
    
    f_train_data[b'data'] = (tr_data - avg) / std
    f_test_data[b'data'] = (te_data - avg) / std
    
    seed = 0
    np.random.seed(0)
    shuffle_train_perm = np.random.permutation(f_train_data[b'data'].shape[0])
    f_train_data[b'data'] = f_train_data[b'data'][shuffle_train_perm]
    f_train_data[b'fine_labels'] = f_train_data[b'fine_labels'][shuffle_train_perm]
    
    num_val_per_class = 20
    
    for i in range(num_class):
        pos = (np.argmax(f_train_data[b'fine_labels'], axis=1) == i)
        
        if (i == 0):
            cifar_100.validation.images = (f_train_data[b'data'][pos])[0 : num_val_per_class]
            cifar_100.validation.labels = (f_train_data[b'fine_labels'][pos])[0 : num_val_per_class]

            cifar_100.train.images = (f_train_data[b'data'][pos])[num_val_per_class : ]
            cifar_100.train.labels = (f_train_data[b'fine_labels'][pos])[num_val_per_class : ]
        else:
            cifar_100.validation.images = np.concatenate((cifar_100.validation.images, (f_train_data[b'data'][pos])[0 : num_val_per_class]))
            cifar_100.validation.labels = np.concatenate((cifar_100.validation.labels, (f_train_data[b'fine_labels'][pos])[0 : num_val_per_class]))

            cifar_100.train.images = np.concatenate((cifar_100.train.images, (f_train_data[b'data'][pos])[num_val_per_class : ]))
            cifar_100.train.labels = np.concatenate((cifar_100.train.labels, (f_train_data[b'fine_labels'][pos])[num_val_per_class : ]))
        
    cifar_100.test.images = f_test_data[b'data']
    cifar_100.test.labels = f_test_data[b'fine_labels']
    
    shuffle_train_perm = np.random.permutation(cifar_100.train.images.shape[0])
    cifar_100.train.images = cifar_100.train.images[shuffle_train_perm]
    cifar_100.train.labels = cifar_100.train.labels[shuffle_train_perm]
    
    if (label_smooth_param != 0):
        smoothLabels(cifar_100)
        
    task_list = splitDataset(cifar_100, split, seed)
    return split, num_tasks, task_weights, task_list

### Display sample images and label names

In [None]:
with open(data_path + 'train', 'rb') as f:
    f_train_data = pickle.load(f, encoding='bytes')
    
with open(data_path + 'meta', 'rb') as f:
    f_meta_data = pickle.load(f, encoding='bytes')

In [None]:
print(f_meta_data[b'fine_label_names'][78])
print(f_meta_data[b'fine_label_names'][61])
print(f_meta_data[b'fine_label_names'][99])

### Create tuner object and train!!

In [10]:
input_shape = (32, 32, 3)
output_shape = (100, )

In [11]:
tuner = HyperparameterTuner(sess=sess, network=network, 
                            input_shape=input_shape, output_shape=output_shape,
                            checkpoint_path=checkpoint_path, summaries_path=summaries_path, 
                            readDatasets=readDatasets, load_best_hparams=False, 
                            reweigh_points_loss=False)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [12]:
tuner.setPerExampleAppend(int(1))
tuner.updateTunerHparams({'mask_softmax' : True})
tuner.updateTunerHparams({'bf_num_images' : 2000})
tuner.setPerExampleAppend(1.0)

### Training each task separately

In [None]:
t = 0
learning_rates = [1e-1]
momentums = [0.9]
regs = [0.0001]
dropout_input_probs = [1.0]
dropout_hidden_probs = [0.9]
prod = list(itertools.product(regs, dropout_input_probs, dropout_hidden_probs, momentums, learning_rates))
hparams = []
for hparams_tuple in prod:
    cur_dict = {}
    cur_dict['reg'] = hparams_tuple[0]
    cur_dict['dropout_input_prob'] = hparams_tuple[2]
    cur_dict['dropout_hidden_prob'] = hparams_tuple[2]
    cur_dict['momentum'] = hparams_tuple[3]
    cur_dict['learning_rate'] = hparams_tuple[4]
    hparams.append(cur_dict)

tuner.hparams_list[t] = hparams

In [None]:
num_epochs = 160
num_updates = math.ceil(tuner.task_list[t].train.images.shape[0] / BATCH_SIZE) * num_epochs
tuner.print_every = 10

In [None]:
start_time = time.time()
best_avg, best_hparams = tuner.tuneOnTask(t, BATCH_SIZE, 
                                          save_weights=False, 
                                          num_updates=num_updates, verbose=True, 
                                          random_crop_flip=True)
print("time taken : %d" % (time.time() - start_time))
sound_file = '/mnt/a99/d0/shriramsb/code/Alan Walker - Alone.mp3'
Audio(sound_file, autoplay=True)

In [None]:
import math
plt.rcParams['figure.figsize'] = [10, 5]
lr_scatter = ([math.log10(h['learning_rate']) for h in hparams])
dropout_scatter = [h['dropout_hidden_prob'] for h in hparams]
colors = []
for i in range(len(hparams)):
    cur_hparam_tuple = tuner.hparamsDictToTuple(hparams[i], tuner.tuner_hparams)
    colors.append(tuner.results_list[t][cur_hparam_tuple]['best_avg'])
    
marker_size = 100
fig, ax = plt.subplots()
plt.scatter(lr_scatter, dropout_scatter, marker_size, c=colors, edgecolors='black')
plt.colorbar()
for i in range(len(lr_scatter)):
    ax.annotate(str('%0.4f' % (colors[i], )), (lr_scatter[i], dropout_scatter[i]))
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = [10, 10]
best_hparams_tuple = tuner.hparamsDictToTuple(hparams[0], tuner.tuner_hparams)
cur_res = tuner.results_list[t][best_hparams_tuple]
x = np.arange(0, cur_res['total_updates'], cur_res['updates_per_epoch'])
cur_best_avg = cur_res['best_avg']
cur_best_avg_updates = cur_res['best_avg_updates']
updates_per_epoch = cur_res['updates_per_epoch']
# print("dropout: %f, fisher_multiplier: %e, lr: %e" % (k[0], k[1], k[2]))
print("cur_best_avg: %e, num_updates: %d" % (cur_best_avg, cur_best_avg_updates))
print("best val_acc: %s" % (str(np.array(cur_res['val_acc'])[:, (cur_best_avg_updates // updates_per_epoch - 1) // tuner.eval_frequency])))
# plt.plot(cur_res['loss_with_penalty'], color='g')
plt.plot(cur_res['loss'], color='m')
plt.plot(x, cur_res['val_loss'][-1], color='b')
plt.show()
# plt.ylim(ymin=0.9)
plt.plot(cur_res['val_acc'][0], color='b', )
# plt.plot(cur_res['val_acc'][1], color='g')

plt.show()

In [None]:
# print(best_avg, best_hparams)
VALIDATION_BATCH_SIZE = 128
print(tuner.validationAccuracy(t, VALIDATION_BATCH_SIZE, restore_model=False))

In [None]:
t = 1
learning_rates = [1e-1]
momentums = [0.9]
regs = [0.0001]
dropout_input_probs = [1.0]
dropout_hidden_probs = [0.9]
prod = list(itertools.product(regs, dropout_input_probs, dropout_hidden_probs, momentums, learning_rates))
hparams = []
for hparams_tuple in prod:
    cur_dict = {}
    cur_dict['reg'] = hparams_tuple[0]
    cur_dict['dropout_input_prob'] = hparams_tuple[2]
    cur_dict['dropout_hidden_prob'] = hparams_tuple[2]
    cur_dict['momentum'] = hparams_tuple[3]
    cur_dict['learning_rate'] = hparams_tuple[4]
    hparams.append(cur_dict)

tuner.hparams_list[t] = hparams

In [None]:
num_epochs = 160
num_updates = math.ceil(tuner.task_list[t].train.images.shape[0] / BATCH_SIZE) * num_epochs
tuner.print_every = 10

In [None]:
start_time = time.time()
best_avg, best_hparams = tuner.tuneOnTask(t, BATCH_SIZE, 
                                          save_weights=False, 
                                          num_updates=num_updates, verbose=True, 
                                          random_crop_flip=True)
print("time taken : %d" % (time.time() - start_time))
sound_file = '/mnt/a99/d0/shriramsb/code/Alan Walker - Alone.mp3'
Audio(sound_file, autoplay=True)

In [None]:
import math
plt.rcParams['figure.figsize'] = [10, 5]
lr_scatter = ([math.log10(h['learning_rate']) for h in hparams])
dropout_scatter = [h['dropout_hidden_prob'] for h in hparams]
colors = []
for i in range(len(hparams)):
    cur_hparam_tuple = tuner.hparamsDictToTuple(hparams[i], tuner.tuner_hparams)
    colors.append(tuner.results_list[t][cur_hparam_tuple]['best_avg'])
    
marker_size = 100
fig, ax = plt.subplots()
plt.scatter(lr_scatter, dropout_scatter, marker_size, c=colors, edgecolors='black')
plt.colorbar()
for i in range(len(lr_scatter)):
    ax.annotate(str('%0.4f' % (colors[i], )), (lr_scatter[i], dropout_scatter[i]))
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = [10, 10]
best_hparams_tuple = tuner.hparamsDictToTuple(hparams[0], tuner.tuner_hparams)
cur_res = tuner.results_list[t][best_hparams_tuple]
x = np.arange(0, cur_res['total_updates'], cur_res['updates_per_epoch'])
cur_best_avg = cur_res['best_avg']
cur_best_avg_updates = cur_res['best_avg_updates']
updates_per_epoch = cur_res['updates_per_epoch']
# print("dropout: %f, fisher_multiplier: %e, lr: %e" % (k[0], k[1], k[2]))
print("cur_best_avg: %e, num_updates: %d" % (cur_best_avg, cur_best_avg_updates))
print("best val_acc: %s" % (str(np.array(cur_res['val_acc'])[:, (cur_best_avg_updates // updates_per_epoch - 1) // tuner.eval_frequency])))
# plt.plot(cur_res['loss_with_penalty'], color='g')
plt.plot(cur_res['loss'], color='m')
plt.plot(x, cur_res['val_loss'][-1], color='b')
plt.show()
# plt.ylim(ymin=0.9)
plt.plot(cur_res['val_acc'][0], color='b', )
plt.plot(cur_res['val_acc'][1], color='g')

plt.show()

### Train tasks together

In [13]:
t = 0
learning_rates = [(((49, 1e-1), (63, 1e-1 / 5), 1e-1 / (5 * 5)), (1e-1, ))]
momentums = [0.9]
regs = [0.00001]
dropout_input_probs = [1.0]
dropout_hidden_probs = [0.9]
prod = list(itertools.product(regs, dropout_input_probs, dropout_hidden_probs, momentums, learning_rates))
hparams = []
for hparams_tuple in prod:
    cur_dict = {}
    cur_dict['reg'] = hparams_tuple[0]
    cur_dict['dropout_input_prob'] = hparams_tuple[2]
    cur_dict['dropout_hidden_prob'] = hparams_tuple[2]
    cur_dict['momentum'] = hparams_tuple[3]
    cur_dict['learning_rate'] = hparams_tuple[4]
    hparams.append(cur_dict)
    
for i in range(0, t + 1):
    tuner.hparams_list[i] = hparams
    

In [14]:
t = 1
learning_rates = [(((20, 1e-1), (30, 1e-1 / 5), 1e-1 / 25), ((20, 1e-2), 1e-2 / 5))]
momentums = [0.9]
regs = [0.00001]
dropout_input_probs = [1.0]
dropout_hidden_probs = [0.9]
# epsilons = [1.0]
T = [5]
alphas = [0.5]
epsilons = [0.0, 0.1, 0.2, 0.4, 0.5, 0.7, 1.0]
prod = list(itertools.product(T, alphas, regs, dropout_input_probs, dropout_hidden_probs, momentums, learning_rates, 
                                epsilons))
hparams = []
for hparams_tuple in prod:
    cur_dict = {}
    cur_dict['T'] = hparams_tuple[0]
    cur_dict['alpha'] = hparams_tuple[1]
    cur_dict['reg'] = hparams_tuple[2]
    cur_dict['dropout_input_prob'] = hparams_tuple[4]
    cur_dict['dropout_hidden_prob'] = hparams_tuple[4]
    cur_dict['momentum'] = hparams_tuple[5]
    cur_dict['learning_rate'] = hparams_tuple[6]
    cur_dict['epsilon'] = hparams_tuple[7]
    hparams.append(cur_dict)
    
for i in range(t, t + 1):
    tuner.hparams_list[i] = hparams

for i in range(0, t):
    for _ in range(len(hparams)):
        tuner.hparams_list[i].append(tuner.hparams_list[i][0])
    

In [27]:
tuner.setPerExampleAppend(5.0)
num_hparams = len(hparams)
num_epochs = 40
num_updates = math.ceil(tuner.task_list[t].train.images.shape[0] / BATCH_SIZE) * num_epochs
num_epochs_bf = 30
num_updates_bf = math.ceil(tuner.task_list[t].train.images.shape[0] / BATCH_SIZE) * num_epochs_bf

In [29]:
best_avg, best_hparams_index = tuner.tuneTasksInRange(1, t, BATCH_SIZE, num_hparams, 
                                                        num_updates=num_updates, verbose=True, 
                                                        random_crop_flip=True, 
                                                        is_sampling_reweighing=True, 
                                                        do_bf_finetuning=True, num_updates_bf=num_updates_bf, 
                                                        bf_only_penultimate_train=False, 
                                                        sigma=1.0)

INFO:tensorflow:Restoring parameters from ./checkpoints_99_1_0_distill/dropout_hidden_prob=0.9,dropout_input_prob=0.9,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=1.0,task=0.ckpt-26040
Training with T=5,alpha=0.5,dropout_hidden_prob=0.9,dropout_input_prob=0.9,epsilon=0.0,fisher_multiplier=0.0,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=5.0,task=1
Restoring paramters from dropout_hidden_prob=0.9,dropout_input_prob=0.9,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=1.0,task=0
INFO:tensorflow:Restoring parameters from ./checkpoints_99_1_0_distill/dropout_hidden_prob=0.9,dropout_input_prob=0.9,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=1.0,task=0.ckpt-26040
epoch: 1, iter: 0/4, validation accuracies: [0.63585857 0.        ], average train loss: 42.409630, average train accuracy: 0.744141
epoch: 2, iter: 0/4,

epoch: 14, iter: 0/4, validation accuracies: [0.65707069 0.64999998], average train loss: 0.280765, average train accuracy: 0.904297
epoch: 15, iter: 0/4, validation accuracies: [0.65505052 0.64999998], average train loss: 0.362341, average train accuracy: 0.884766
epoch: 16, iter: 0/4, validation accuracies: [0.65606059 0.64999998], average train loss: 0.278167, average train accuracy: 0.898438
epoch: 17, iter: 0/4, validation accuracies: [0.65101009 0.64999998], average train loss: 0.308777, average train accuracy: 0.902344
epoch: 18, iter: 0/4, validation accuracies: [0.64949494 0.64999998], average train loss: 0.305998, average train accuracy: 0.900391
epoch: 19, iter: 0/4, validation accuracies: [0.65202021 0.64999998], average train loss: 0.325358, average train accuracy: 0.904297
epoch: 20, iter: 0/4, validation accuracies: [0.65202019 0.64999998], average train loss: 0.251395, average train accuracy: 0.925781
epoch: 21, iter: 0/4, validation accuracies: [0.65252526 0.64999998],

epoch: 36, iter: 0/4, validation accuracies: [0.62777779 1.        ], average train loss: 40.491287, average train accuracy: 0.849609
epoch: 37, iter: 0/4, validation accuracies: [0.62828282 1.        ], average train loss: 39.867374, average train accuracy: 0.873047
epoch: 38, iter: 0/4, validation accuracies: [0.63232324 1.        ], average train loss: 40.813599, average train accuracy: 0.867188
epoch: 39, iter: 0/4, validation accuracies: [0.63636363 1.        ], average train loss: 40.638447, average train accuracy: 0.861328
epoch: 40, iter: 0/4, validation accuracies: [0.63030303 1.        ], average train loss: 40.732841, average train accuracy: 0.863281
epochs: 40.000000, final train loss: 39.713844, validation accuracies: [0.63030303 1.        ]
best epochs: 39.000000, best_avg: 0.640000, validation accuracies: [0.63636363 1.        ]
Training with T=5,alpha=0.5,dropout_hidden_prob=0.9,dropout_input_prob=0.9,epsilon=0.1,fisher_multiplier=0.0,learning_rate=too_long,momentum=0.9

epoch: 15, iter: 0/4, validation accuracies: [0.57979797 0.94999999], average train loss: 40.862320, average train accuracy: 0.806641
epoch: 16, iter: 0/4, validation accuracies: [0.59545453 1.        ], average train loss: 40.389744, average train accuracy: 0.802734
epoch: 17, iter: 0/4, validation accuracies: [0.57121211 0.94999999], average train loss: 40.711689, average train accuracy: 0.806641
epoch: 18, iter: 0/4, validation accuracies: [0.58333334 1.        ], average train loss: 41.242264, average train accuracy: 0.771484
epoch: 19, iter: 0/4, validation accuracies: [0.55454547 0.94999999], average train loss: 40.745331, average train accuracy: 0.830078
epoch: 20, iter: 0/4, validation accuracies: [0.58333335 0.94999999], average train loss: 41.656925, average train accuracy: 0.794922
epoch: 21, iter: 0/4, validation accuracies: [0.60656565 0.94999999], average train loss: 41.180450, average train accuracy: 0.791016
epoch: 22, iter: 0/4, validation accuracies: [0.61666668 0.949

time taken: %f 6.845510005950928
saving penultimate output...
INFO:tensorflow:Restoring parameters from ./checkpoints_99_1_0_distill/dropout_hidden_prob=0.9,dropout_input_prob=0.9,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=1.0,task=0.ckpt-26040
Training with T=5,alpha=0.5,dropout_hidden_prob=0.9,dropout_input_prob=0.9,epsilon=0.4,fisher_multiplier=0.0,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=5.0,task=1
Restoring paramters from dropout_hidden_prob=0.9,dropout_input_prob=0.9,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=1.0,task=0
INFO:tensorflow:Restoring parameters from ./checkpoints_99_1_0_distill/dropout_hidden_prob=0.9,dropout_input_prob=0.9,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=1.0,task=0.ckpt-26040
epoch: 1, iter: 0/4, validation accuracies: [0.64141414 0.        ], average train loss: 42

epoch: 13, iter: 0/4, validation accuracies: [0.65656565 0.5       ], average train loss: 0.320846, average train accuracy: 0.890625
epoch: 14, iter: 0/4, validation accuracies: [0.65808082 0.5       ], average train loss: 0.287225, average train accuracy: 0.908203
epoch: 15, iter: 0/4, validation accuracies: [0.66161616 0.5       ], average train loss: 0.284668, average train accuracy: 0.912109
epoch: 16, iter: 0/4, validation accuracies: [0.66363637 0.5       ], average train loss: 0.259211, average train accuracy: 0.927734
epoch: 17, iter: 0/4, validation accuracies: [0.65858587 0.5       ], average train loss: 0.286837, average train accuracy: 0.906250
epoch: 18, iter: 0/4, validation accuracies: [0.66212121 0.44999999], average train loss: 0.294483, average train accuracy: 0.900391
epoch: 19, iter: 0/4, validation accuracies: [0.65959596 0.44999999], average train loss: 0.329090, average train accuracy: 0.910156
epoch: 20, iter: 0/4, validation accuracies: [0.65959597 0.5       ],

epoch: 35, iter: 0/4, validation accuracies: [0.63030304 1.        ], average train loss: 40.337463, average train accuracy: 0.843750
epoch: 36, iter: 0/4, validation accuracies: [0.63131314 1.        ], average train loss: 41.145531, average train accuracy: 0.843750
epoch: 37, iter: 0/4, validation accuracies: [0.63131312 1.        ], average train loss: 40.287334, average train accuracy: 0.843750
epoch: 38, iter: 0/4, validation accuracies: [0.63080808 1.        ], average train loss: 41.069248, average train accuracy: 0.835938
epoch: 39, iter: 0/4, validation accuracies: [0.63080808 1.        ], average train loss: 40.554363, average train accuracy: 0.818359
epoch: 40, iter: 0/4, validation accuracies: [0.63030303 1.        ], average train loss: 41.389603, average train accuracy: 0.847656
epochs: 40.000000, final train loss: 40.858635, validation accuracies: [0.63030303 1.        ]
best epochs: 36.000000, best_avg: 0.635000, validation accuracies: [0.63131314 1.        ]
Training w

epoch: 14, iter: 0/4, validation accuracies: [0.57828284 0.69999999], average train loss: 40.598671, average train accuracy: 0.785156
epoch: 15, iter: 0/4, validation accuracies: [0.57828283 0.80000001], average train loss: 40.868412, average train accuracy: 0.785156
epoch: 16, iter: 0/4, validation accuracies: [0.55252524 0.85000002], average train loss: 41.314831, average train accuracy: 0.759766
epoch: 17, iter: 0/4, validation accuracies: [0.57272729 0.94999999], average train loss: 41.534267, average train accuracy: 0.791016
epoch: 18, iter: 0/4, validation accuracies: [0.57828282 0.94999999], average train loss: 40.403973, average train accuracy: 0.806641
epoch: 19, iter: 0/4, validation accuracies: [0.56060607 1.        ], average train loss: 40.946133, average train accuracy: 0.787109
epoch: 20, iter: 0/4, validation accuracies: [0.57323234 1.        ], average train loss: 41.242065, average train accuracy: 0.781250
epoch: 21, iter: 0/4, validation accuracies: [0.58939394 1.   

time taken: %f 6.945967435836792
saving penultimate output...
INFO:tensorflow:Restoring parameters from ./checkpoints_99_1_0_distill/dropout_hidden_prob=0.9,dropout_input_prob=0.9,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=1.0,task=0.ckpt-26040
Training with T=5,alpha=0.5,dropout_hidden_prob=0.9,dropout_input_prob=0.9,epsilon=1.0,fisher_multiplier=0.0,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=5.0,task=1
Restoring paramters from dropout_hidden_prob=0.9,dropout_input_prob=0.9,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=1.0,task=0
INFO:tensorflow:Restoring parameters from ./checkpoints_99_1_0_distill/dropout_hidden_prob=0.9,dropout_input_prob=0.9,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=1.0,task=0.ckpt-26040
epoch: 1, iter: 0/4, validation accuracies: [0.64595959 0.        ], average train loss: 40

epoch: 13, iter: 0/4, validation accuracies: [0.6479798  0.64999998], average train loss: 0.294285, average train accuracy: 0.894531
epoch: 14, iter: 0/4, validation accuracies: [0.6540404  0.64999998], average train loss: 0.318168, average train accuracy: 0.902344
epoch: 15, iter: 0/4, validation accuracies: [0.65151516 0.64999998], average train loss: 0.312275, average train accuracy: 0.908203
epoch: 16, iter: 0/4, validation accuracies: [0.64999999 0.64999998], average train loss: 0.314325, average train accuracy: 0.894531
epoch: 17, iter: 0/4, validation accuracies: [0.6449495  0.64999998], average train loss: 0.315121, average train accuracy: 0.898438
epoch: 18, iter: 0/4, validation accuracies: [0.64646464 0.64999998], average train loss: 0.306238, average train accuracy: 0.919922
epoch: 19, iter: 0/4, validation accuracies: [0.65151514 0.64999998], average train loss: 0.314454, average train accuracy: 0.898438
epoch: 20, iter: 0/4, validation accuracies: [0.6540404  0.64999998],

In [None]:
plt.rcParams['figure.figsize'] = [5, 5]
best_hparams_tuple = tuner.hparamsDictToTuple(hparams[best_hparams_index], tuner.tuner_hparams)
cur_res = tuner.results_list[t][best_hparams_tuple]
x = np.arange(0, cur_res[0]['total_updates'] + cur_res[1]['total_updates'], cur_res[0]['updates_per_epoch'])
cur_best_avg = cur_res[1]['best_avg']
cur_best_epoch = cur_res[1]['best_epoch']
updates_per_epoch = cur_res[1]['updates_per_epoch']
# print("dropout: %f, fisher_multiplier: %e, lr: %e" % (k[0], k[1], k[2]))
print("cur_best_avg: %e, num_updates: %d" % (cur_best_avg, cur_best_epoch))
print("best val_acc: %s" % (str(np.array(cur_res[1]['val_acc'])[:, (cur_best_epoch - 1) // tuner.eval_frequency])))
# plt.plot(cur_res['loss_with_penalty'], color='g')
# plt.plot(np.concatenate((cur_res['loss'][0][-1], cur_res['loss'][1][-1]), axis=0), color='b')
# plt.plot(np.concatenate((cur_res[0]['val_loss'][-1], cur_res[1]['val_loss'][-1]), axis=0), color='b')
# plt.show()
# plt.ylim(ymin=0.9)
plt.plot(np.concatenate((cur_res[0]['val_acc'][-1], cur_res[1]['val_acc'][-1]), axis=0), color='b')
plt.plot(np.concatenate((cur_res[0]['val_acc'][0], cur_res[1]['val_acc'][0]), axis=0), color='g')
# plt.plot(cur_res['val_acc'][1], color='g')

plt.show()

In [30]:
for hparam in hparams:
    hparam_tuple = tuner.hparamsDictToTuple(hparam, tuner.tuner_hparams)
    print(hparam['epsilon'], np.array(tuner.results_list[t][hparam_tuple][1]['val_acc'])[:, -1])

0.0 [0.65151514 0.64999998]
0.1 [0.64090909 0.5       ]
0.2 [0.65606062 0.60000002]
0.4 [0.65656566 0.5       ]
0.5 [0.6409091  0.60000002]
0.7 [0.65050504 0.60000002]
1.0 [0.65050505 0.69999999]


In [31]:
# test accuracy
TEST_BATCH_SIZE = 128
test_accuracies = []
for hparam in hparams:
    accuracy = tuner.test(t, BATCH_SIZE, restore_model=True, hparams=hparam)
    test_accuracies.append(accuracy)
    print(hparam['epsilon'], accuracy)

INFO:tensorflow:Restoring parameters from ./checkpoints_99_1_0_distill/T=5,alpha=0.5,dropout_hidden_prob=0.9,dropout_input_prob=0.9,epsilon=0.0,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=5.0,task=1.ckpt-280
0.0 [0.6510101011305144, 0.46000000834465027]
INFO:tensorflow:Restoring parameters from ./checkpoints_99_1_0_distill/T=5,alpha=0.5,dropout_hidden_prob=0.9,dropout_input_prob=0.9,epsilon=0.1,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=5.0,task=1.ckpt-280
0.1 [0.6477777778259431, 0.38999998569488525]
INFO:tensorflow:Restoring parameters from ./checkpoints_99_1_0_distill/T=5,alpha=0.5,dropout_hidden_prob=0.9,dropout_input_prob=0.9,epsilon=0.2,learning_rate=too_long,momentum=0.9,reg=1e-05,bf_num_images=2000,mask_softmax=True,old:new=5.0,task=1.ckpt-280
0.2 [0.6480808080326427, 0.4099999964237213]
INFO:tensorflow:Restoring parameters from ./checkpoints_99_1_0_distill/T=5,alpha=0.5,dropout_hidde

In [32]:
# Weights given to each class
num_class = 100
for i in range(num_class):
    print(i, np.sum(tuner.appended_task_list[-1].train.weights[(np.argmax(tuner.appended_task_list[-1].train.labels, axis=1) == i)]))

0 0.008537709251005509
1 0.008655093950555082
2 0.008482877204380455
3 0.008224384955454751
4 0.008387565457521656
5 0.008682897477745968
6 0.008466476907745464
7 0.008561524361797947
8 0.008739389021223832
9 0.008341486242799484
10 0.008857883800945952
11 0.008259497093282766
12 0.008424734147594648
13 0.008293588403295506
14 0.0086948261616146
15 0.008473640914175172
16 0.008502468214125319
17 0.007729617306914627
18 0.008741595108047243
19 0.008329476711214524
20 0.008556082821999185
21 0.008201436484924043
22 0.008746339614711718
23 0.007886931102217543
24 0.008501038452007014
25 0.008638246304378046
26 0.008817121517966573
27 0.008609091426248768
28 0.00859612933625254
29 0.008605774009607034
30 0.00838271872273839
31 0.008145779325567298
32 0.008630240567838166
33 0.00792519109412509
34 0.008335150379537958
35 0.008310583340327443
36 0.008435426242390751
37 0.008205269742428557
38 0.008405688521830353
39 0.00828963160457236
40 0.008836938387998332
41 0.008561492092924408
42 0.008

In [None]:
old_weights = tuner.appended_task_list[1].train.weights[np.argmax(tuner.appended_task_list[1].train.labels, axis=1) != 99]
plt.rcParams['figure.figsize'] = [10, 10]
# plt.hist(old_weights)
# seaborn.distplot(old_weights)
print(old_weights.shape)
print(np.sum(old_weights < 1e-5))
print(np.sum((old_weights >= 1e-5) * (old_weights < 1e-2)))
# for i in range(1000):
#     print(w[i])

In [None]:
tuner.saveResultsList()
tuner.saveBestHparams()

In [None]:
if use_tpu:
    sess.run(tpu.shutdown_system())

sess.close()