In [1]:
from IPython.display import Audio

In [2]:
import sys
sys.path.append('../../')

In [3]:
import numpy as np
import math

%matplotlib inline
import matplotlib.pyplot as plt

import argparse
import time
import itertools
from copy import deepcopy
import tensorflow as tf

from tuner import HyperparameterTuner
from tuner import MyTask

use_tpu = False
use_gpu = True

if use_tpu:
    from tensorflow.contrib import tpu
    from tensorflow.contrib.cluster_resolver import TPUClusterResolver

if use_gpu:
    import os
    
%load_ext autoreload
%autoreload 2

In [4]:
if use_gpu:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

In [5]:
if use_tpu:
    tpu_cluster = TPUClusterResolver(tpu=[tpu_name]).get_master()
    sess = tf.Session(tpu_cluster)
    sess.run(tpu.initialize_system())
elif use_gpu:
    sess = tf.Session(config=config)
else:
    sess = tf.Session()

In [6]:
class Network(object):
    def __init__(self):
        self.layers = None
        self.createLayers()

    def createLayers(self):
        self.layers = []
        self.layers.append(tf.layers.Dense(units=2000, activation=tf.nn.relu))
        self.layers.append(tf.layers.Dense(units=2000, activation=tf.nn.relu))
        self.layers.append(tf.layers.Dense(units=10))

    def forward(self, x, apply_dropout, keep_prob_input=1.0, keep_prob_hidden=1.0):
        layer_output = []
        input_shape = np.prod(x.shape.as_list()[1:])
        x = tf.reshape(x, [-1, input_shape])
        if (apply_dropout):
            x = tf.nn.dropout(x, keep_prob_input)
        y = x
        for i in range(len(self.layers) - 1):
            y = self.layers[i](y)
            if (apply_dropout):
                y = tf.nn.dropout(y, keep_prob_hidden)
            layer_output.append(y)
        y = self.layers[-1](y)
        layer_output.append(y)
        return y, layer_output

    def getLayerVariables(self):
        l = []
        for i in range(len(self.layers)):
            l.extend(self.layers[i].variables)
        return l
    def name(self):
        return 'fc2000_fc2000_fc10'

In [7]:
network = Network()

In [8]:
task_home = ''
if use_tpu:
    task_home = 'gs://continual_learning/permMNIST_EWC/'
else:
    task_home = '../../../'

cur_dir = './'
checkpoint_path = cur_dir + 'checkpoints/'
summaries_path = cur_dir + 'summaries/'
data_path = task_home + 'MNIST_data/'
split_path = './split.txt' 
if use_tpu:
    tpu_name = 'gectpu'
    
BATCH_SIZE = 256

In [9]:
label_smooth_param = 0

def readSplit(split_path):
    split = []
    try:
        f = open(split_path)
        while (True):
            line = f.readline()
            if (line == ""):
                break
            split.append([float(i) for i in line.split()])
    except IOError:
        print("split path file not found")
        exit(-1)
    return split

from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
    
def permuteDataset(task, seed):
    np.random.seed(seed)
    perm = np.random.permutation(task.train._images.shape[1])
    permuted = deepcopy(task)
    permuted.train._images = permuted.train._images[:, perm]
    permuted.test._images = permuted.test._images[:, perm]
    permuted.validation._images = permuted.validation._images[:, perm]
    return MyTask(permuted)
    
def smoothLabels(dataset):
    train_labels = dataset.train.labels
    train_labels_argmax = np.argmax(train_labels, axis=1)
    train_labels = train_labels + label_smooth_param / (train_labels.shape[1] - 1)
    train_labels[range(train_labels.shape[0]), train_labels_argmax] = 1 - label_smooth_param
    dataset.train._labels = train_labels
    
    
def readDatasets():
    num_tasks = 10
    num_classes = 10
    task_weights = []
    split = [[] for _ in range(num_tasks)]
    for i in range(num_tasks):
        for j in range(num_classes):
            split[i].append(j)
        task_weights.append(1.0)
    
    mnist = read_data_sets(data_path, one_hot=True)
    if (label_smooth_param != 0):
        smoothLabels(mnist)
        
    seed = 1
    np.random.seed(seed)
    shuffle_train_perm = np.random.permutation(mnist.train._images.shape[0])
    mnist.train._images = mnist.train._images[shuffle_train_perm, :]
    mnist.train._labels = mnist.train._labels[shuffle_train_perm, :]
    
    task_list = []
    task_list.append(MyTask(mnist))
    for seed in range(1, num_tasks):
        task_list.append(permuteDataset(mnist, seed))
    return split, num_tasks, task_weights, task_list


In [10]:
input_shape = (28 * 28, )
output_shape = (10, )

In [11]:
tuner = HyperparameterTuner(sess=sess, network=network, 
                            input_shape=input_shape, output_shape=output_shape,
                            checkpoint_path=checkpoint_path, summaries_path=summaries_path, 
                            readDatasets=readDatasets, load_best_hparams=False)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ../../../MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ../../../MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ../../../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../../../MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [12]:
tuner.setPerExampleAppend(0.5)

In [13]:
tuner.num_tasks

10

In [15]:
# task 0 - 4
t = 4
learning_rates = [1e-4]
fisher_multipliers = [0.0]
dropout_input_probs = [1.0]
dropout_hidden_probs = [0.75]
prod = list(itertools.product(fisher_multipliers, dropout_input_probs, dropout_hidden_probs, learning_rates))
hparams = []
for hparams_tuple in prod:
    cur_dict = {}
    cur_dict['fisher_multiplier'] = hparams_tuple[0]
    cur_dict['dropout_input_prob'] = hparams_tuple[2]
    cur_dict['dropout_hidden_prob'] = hparams_tuple[2]
    cur_dict['learning_rate'] = hparams_tuple[3]
    hparams.append(cur_dict)


num_hparams = 1
for i in range(t + 1):
    tuner.hparams_list[i] = hparams

In [17]:
num_epochs = 20
num_updates = math.ceil(tuner.task_list[0].train.images.shape[0] / BATCH_SIZE) * num_epochs

In [18]:
tuner.tuneTasksInRange(0, t, BATCH_SIZE, num_hparams, num_updates, verbose=True)

Training with dropout_hidden_prob=0.75,dropout_input_prob=0.75,fisher_multiplier=0.0,learning_rate=0.0001,old:new=0.5,task=0
validation accuracies: [0.204], loss: 2.356296
validation accuracies: [0.9724], loss: 0.102200
validation accuracies: [0.9806], loss: 0.093562
validation accuracies: [0.9826], loss: 0.086754
validation accuracies: [0.986], loss: 0.069538
epochs: 20.000000, final train loss: 0.023734, validation accuracies: [0.9846]
best epochs: 18.604651, best_avg: 0.986000, validation accuracies: [0.986]
saving model dropout_hidden_prob=0.75,dropout_input_prob=0.75,fisher_multiplier=0.0,learning_rate=0.0001,old:new=0.5,task=0 at time step 4299
calculating penultimate output...
time taken: %f 11.987691402435303
saving penultimate output...
INFO:tensorflow:Restoring parameters from ./checkpoints/dropout_hidden_prob=0.75,dropout_input_prob=0.75,fisher_multiplier=0.0,learning_rate=0.0001,old:new=0.5,task=0.ckpt-4299
Training with dropout_hidden_prob=0.75,dropout_input_prob=0.75,fish

OverflowError: cannot serialize a bytes object larger than 4 GiB

In [None]:
best_avg, best_hparams = tuner.tuneOnTask(t, BATCH_SIZE, save_weights=False)

sound_file = '/mnt/a99/d0/shriramsb/code/Alan Walker - Alone.mp3'
Audio(sound_file, autoplay=True)

In [None]:
import math
plt.rcParams['figure.figsize'] = [10, 5]
lr_scatter = ([math.log10(h['learning_rate']) for h in hparams])
dropout_scatter = [h['dropout_hidden_prob'] for h in hparams]
colors = []
for i in range(len(hparams)):
    cur_hparam_tuple = tuple([v for k, v in sorted(hparams[i].items())])
    colors.append(tuner.results_list[t][cur_hparam_tuple]['best_avg'])
    
marker_size = 100
fig, ax = plt.subplots()
plt.scatter(lr_scatter, dropout_scatter, marker_size, c=colors, edgecolors='black')
plt.colorbar()
for i in range(len(lr_scatter)):
    ax.annotate(str('%0.4f' % (colors[i], )), (lr_scatter[i], dropout_scatter[i]))
plt.show()

In [19]:
# print(best_avg, best_hparams)
VALIDATION_BATCH_SIZE = 256
print(tuner.validationAccuracy(t, VALIDATION_BATCH_SIZE, restore_model=False))

[0.9849999994277954, 0.9851999998092651, 0.9847999994277954, 0.9825999994277954, 0.9807999994277954]


In [21]:
tuner.saveResultsList()
tuner.saveBestHparams()

In [None]:
# print("best_avg: %e, best_params: %s" % (best_avg, str(best_params)))
# print("best_params: dropout: %f, fisher_multiplier: %e, lr: %e" % best_params)

# for k, v in tuner.results_list.items():
# best_avg_tuple = tuple([v for k, v in sorted(tuner.best_hparams[t][0].items())])
plt.rcParams['figure.figsize'] = [10, 10]
best_hparams_tuple = tuple([v for k, v in sorted(best_hparams.items())])
cur_res = tuner.results_list[t][best_hparams_tuple]
x = np.arange(0, len(cur_res['loss']), tuner.eval_frequency)
cur_best_avg = cur_res['best_avg']
cur_best_avg_updates = cur_res['best_avg_updates']
# print("dropout: %f, fisher_multiplier: %e, lr: %e" % (k[0], k[1], k[2]))
print("cur_best_avg: %e, num_updates: %d" % (cur_best_avg, cur_best_avg_updates))
print("best val_acc: %s" % (str(np.array(cur_res['val_acc'])[:, cur_best_avg_updates // tuner.eval_frequency])))
plt.plot(cur_res['loss_with_penalty'], color='g')
plt.plot(cur_res['loss'], color='m')
plt.plot(x, cur_res['val_loss'][-1], color='b')
plt.show()
plt.ylim(ymin=0.9)
plt.plot(cur_res['val_acc'][0], color='b', )
plt.plot(cur_res['val_acc'][1], color='g')
plt.plot(cur_res['val_acc'][2], color='r')

plt.show()

In [None]:
for i in range(t + 1):
    print(tuner.best_hparams[i])

In [None]:
def getConfusionMatrix(tuner):
    num_labels = 10
    pred = np.array([])
    actual = np.array([])
    for j in range(t + 1):
        val_data = tuner.task_list[j].validation
        feed_dict = tuner.classifier.createFeedDict(val_data.images, val_data.labels)
        cur_scores, cur_y = tuner.classifier.getPredictions(sess, feed_dict)
        cur_pred = np.argmax(cur_scores, 1)
        cur_actual = np.argmax(cur_y, 1)
        actual = np.concatenate([actual, cur_actual])
        pred = np.concatenate([pred, cur_pred])
    confusion_matrix = np.zeros((num_labels,num_labels), dtype=np.int64)

    for i in range(actual.shape[0]):
        confusion_matrix[int(actual[i]), int(pred[i])] += 1
    return confusion_matrix

def printConfusionMatrix(confusion_matrix):
    print("%3d" % (0, ), end=' ')
    for j in range(confusion_matrix.shape[1]):
        print("%3d" % (j, ), end=' ')
    print("")
    for i in range(confusion_matrix.shape[0]):
        print("%3d" % (i, ), end=' ')
        for j in range(confusion_matrix.shape[1]):
            print("%3d" % (confusion_matrix[i][j], ), end= ' ')
        print("")

In [None]:
confusion_matrix = getConfusionMatrix(tuner)
printConfusionMatrix(confusion_matrix)

In [None]:
plt.imshow(tuner.appended_task_list[1].train.images[0].reshape(28, 28), cmap='gray')
examples_per_class_1 = np.sum(tuner.appended_task_list[1].train.labels, axis=0).astype(np.int64)
examples_per_class_2 = np.sum(tuner.appended_task_list[2].train.labels, axis=0).astype(np.int64)
print(examples_per_class_1)
print(examples_per_class_2)

In [20]:
TEST_BATCH_SIZE = 32
accuracy = tuner.test(t, TEST_BATCH_SIZE, restore_model=False)
print(accuracy)

[0.9849, 0.9842, 0.9837, 0.9808, 0.9768]


In [None]:
# calculating weight for each class
num_classes = 10
class_weights = [0.0 for _ in range(num_classes)]
for i in range(num_classes):
    cur_class_indices = np.argmax(tuner.appended_task_list[t].train.labels, axis=1) == i
    class_weights[i] += np.sum(tuner.appended_task_list[t].train.weights[cur_class_indices])
for i in range(num_classes):
    print("%6d" % (i, ), end='')
print("")
print(" ", end='')
for i in range(num_classes):
    print("%2.3f" % (class_weights[i] * 100, ), end=' ')

In [None]:
if use_tpu:
    sess.run(tpu.shutdown_system())

sess.close()