In [1]:
from IPython.display import Audio

In [2]:
import sys
sys.path.append('../../../')

In [3]:
import numpy as np
import math

%matplotlib inline
import matplotlib.pyplot as plt

import argparse
import time
import itertools
from copy import deepcopy
import tensorflow as tf

from tuner import HyperparameterTuner
from tuner import MyTask

use_tpu = False
use_gpu = True

if use_tpu:
    from tensorflow.contrib import tpu
    from tensorflow.contrib.cluster_resolver import TPUClusterResolver

if use_gpu:
    import os
    
%load_ext autoreload
%autoreload 2

In [4]:
if use_gpu:
    os.environ['CUDA_VISIBLE_DEVICES'] = '2'
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

In [5]:
if use_tpu:
    tpu_cluster = TPUClusterResolver(tpu=[tpu_name]).get_master()
    sess = tf.Session(tpu_cluster)
    sess.run(tpu.initialize_system())
elif use_gpu:
    sess = tf.Session(config=config)
else:
    sess = tf.Session()

In [6]:
class Network(object):
    def __init__(self):
        self.layers = None
        self.createLayers()

    def createLayers(self):
        self.layers = []
        self.layers.append(tf.layers.Dense(units=256, activation=tf.nn.relu))
        self.layers.append(tf.layers.Dense(units=256, activation=tf.nn.relu))
        self.layers.append(tf.layers.Dense(units=10))

    def forward(self, x, apply_dropout, keep_prob_input=1.0, keep_prob_hidden=1.0):
        layer_output = []
        input_shape = np.prod(x.shape.as_list()[1:])
        x = tf.reshape(x, [-1, input_shape])
        if (apply_dropout):
            x = tf.nn.dropout(x, keep_prob_input)
        y = x
        for i in range(len(self.layers) - 1):
            y = self.layers[i](y)
            if (apply_dropout):
                y = tf.nn.dropout(y, keep_prob_hidden)
            layer_output.append(y)
        y = self.layers[-1](y)
        layer_output.append(y)
        return y, layer_output

    def getLayerVariables(self):
        l = []
        for i in range(len(self.layers)):
            l.extend(self.layers[i].variables)
        return l
    def name(self):
        return 'fc256_fc256_fc10'

In [7]:
network = Network()

In [8]:
task_home = ''
if use_tpu:
    pass
#     task_home = 'gs://continual_learning/permMNIST_EWC/'
else:
    task_home = '../../../../'

cur_dir = './'
checkpoint_path = cur_dir + 'checkpoints/'
summaries_path = cur_dir + 'summaries/'
data_path = task_home + 'MNIST_data/'
if use_tpu:
    tpu_name = 'gectpu'
    
BATCH_SIZE = 128

In [9]:
label_smooth_param = 0

from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets

def split_mnist(mnist, dataset_split, seed):
    np.random.seed(seed)
    task_list = []
    train_labels = np.argmax(mnist.train.labels, axis=1)
    validation_labels = np.argmax(mnist.validation.labels, axis=1)
    test_labels = np.argmax(mnist.test.labels, axis=1)
    for i in range(len(dataset_split)):
        cur_train_indices = [False] * mnist.train.images.shape[0]
        cur_validation_indices = [False] * mnist.validation.images.shape[0]
        cur_test_indices = [False] * mnist.test.images.shape[0]
        for j in range(len(dataset_split[i])):
            cur_train_indices = np.logical_or(cur_train_indices, (train_labels == dataset_split[i][j]))
            cur_validation_indices = np.logical_or(cur_validation_indices, (validation_labels == dataset_split[i][j]))
            cur_test_indices = np.logical_or(cur_test_indices, (test_labels == dataset_split[i][j]))

        task = deepcopy(mnist)
        task.train._images = task.train._images[cur_train_indices]
        task.train._labels = task.train._labels[cur_train_indices]
        task.validation._images = task.validation._images[cur_validation_indices]
        task.validation._labels = task.validation._labels[cur_validation_indices]
        task.test._images = task.test._images[cur_test_indices]
        task.test._labels = task.test._labels[cur_test_indices]
        task = MyTask(task)
        task_list.append(task)

    return task_list
    
def smoothLabels(dataset):
    train_labels = dataset.train.labels
    train_labels_argmax = np.argmax(train_labels, axis=1)
    train_labels = train_labels + label_smooth_param / (train_labels.shape[1] - 1)
    train_labels[range(train_labels.shape[0]), train_labels_argmax] = 1 - label_smooth_param
    dataset.train._labels = train_labels

def readDatasets():
    split = [[0, 1, 2, 3, 4, 5, 6, 7], [8], [9]]
    num_tasks = 3
    task_weights = [0.8, 0.1, 0.1]
    
    mnist = read_data_sets(data_path, one_hot=True)
    seed = 1
    np.random.seed(seed)
    shuffle_train_perm = np.random.permutation(mnist.train._images.shape[0])
    mnist.train._images = mnist.train._images[shuffle_train_perm, :]
    mnist.train._labels = mnist.train._labels[shuffle_train_perm, :]
    
    if (label_smooth_param != 0):
        smoothLabels(mnist)
        
    task_list = split_mnist(mnist, split, seed)
    return split, num_tasks, task_weights, task_list

In [10]:
input_shape = (28 * 28, )
output_shape = (10, )

In [11]:
tuner = HyperparameterTuner(sess=sess, network=network, 
                            input_shape=input_shape, output_shape=output_shape,
                            checkpoint_path=checkpoint_path, summaries_path=summaries_path, 
                            readDatasets=readDatasets, load_best_hparams=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ../../../../MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ../../../../MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ../../../../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../../../../MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [12]:
tuner.setPerExampleAppend(0)

In [13]:
t = 0

In [14]:
tuner.classifier.restoreModel(tuner.sess, tuner.best_hparams[t][-1])

INFO:tensorflow:Restoring parameters from ./checkpoints/dropout_hidden_prob=0.8,dropout_input_prob=0.8,fisher_multiplier=0.0,learning_rate=0.001,per_example_append=0,task=0.ckpt-6500


In [16]:
final_weights = sess.run(tuner.classifier.fisher_diagonal[-2])

In [17]:
for i in range(output_shape[0]):
    print(np.linalg.norm(final_weights[:, i]), np.var(final_weights[:, i]))

0.0033404585 3.2578303e-08
0.005159856 7.380013e-08
0.003307979 3.0114474e-08
0.0050348323 6.843579e-08
0.0023066532 1.4250447e-08
0.005910843 9.733458e-08
0.0053031663 8.275407e-08
0.0037771338 4.0740204e-08
1.9614368e-09 9.533352e-21
2.8567997e-09 2.1275817e-20


In [18]:
val_data = tuner.task_list[t].validation
feed_dict = tuner.classifier.createFeedDict(val_data.images, val_data.labels)
cur_scores, cur_y = tuner.classifier.getPredictions(sess, feed_dict)

In [19]:
for i in range(output_shape[0]):
    print(np.linalg.norm(cur_scores[:, i]), np.var(cur_scores[:, i]))

646.5424 88.330864
523.2776 64.73493
544.473 64.53515
576.9781 81.04634
616.284 89.30173
595.80255 84.68893
708.8196 101.67858
602.1324 87.30986
1074.6989 20.220638
1039.156 18.482508


In [None]:
if use_tpu:
    sess.run(tpu.shutdown_system())

sess.close()