In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [68]:
import sys, os
import numpy as np
import scipy 
import tensorflow as tf

import imageio
import gzip
from PIL import Image

import seaborn as sns
import matplotlib.colors as colors
import matplotlib.cm as cmx
import matplotlib.pyplot as plt
#from tqdm import trange, tqdm

import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD, Adam, RMSprop, Optimizer
from keras.callbacks import Callback
from collections import OrderedDict

In [8]:
from helpers import utils
# from helpers import protocols
# from helpers.keras_utils import LossHistory
# from helpers.optimizers import KOOptimizer

In [9]:
tf.logging.set_verbosity(tf.logging.INFO)

In [10]:
# """""""PARAMETERS"""""" 

# Data params
input_dim = 784
output_dim = 10

# Network params
n_hidden_units = 2000
activation_fn = tf.nn.relu

# Optimization params
batch_size = 256
epochs_per_task = 20 
learning_rate=1e-3
xi = 0.1

# Reset optimizer after each age
reset_optimizer = False

In [11]:
# """""""CONSTRUCT DATASETS"""""" 

n_tasks = 10
full_datasets, final_test_datasets = utils.construct_permute_mnist(num_tasks=n_tasks)
# training_datasets, validation_datasets = utils.mk_training_validation_splits(full_datasets, split_fractions=(0.9, 0.1))
training_datasets = full_datasets
validation_datasets = final_test_datasets

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [57]:
def quadratic_regularizer(weights, vars, norm=2):
    reg = 0.0
    for w in weights:
        reg += tf.reduce_sum(vars['omega'][w] * (w - vars['cweights'][w])**norm)
    return reg

In [58]:
class KOOptimizer(Optimizer):
    """An optimizer whose loss depends on its own updates."""

    def _allocate_var(self, name=None):
        return {w: K.zeros(w.get_shape(), name=name) for w in self.weights}

    def _allocate_vars(self, names):
        #TODO: add names, better shape/init checking
        self.vars = {name: self._allocate_var(name=name) for name in names}

    def __init__(self, opt, step_updates=[], task_updates=[], init_updates=[], task_metrics = {}, regularizer_fn=quadratic_regularizer,
                lam=1.0, model=None, compute_average_loss=False, compute_average_weights=False, **kwargs):
        super(KOOptimizer, self).__init__(**kwargs)
        if not isinstance(opt, keras.optimizers.Optimizer):
            raise ValueError("opt must be an instance of keras.optimizers.Optimizer but got %s"%type(opt))
        if not isinstance(step_updates, OrderedDict):
            step_updates = OrderedDict(step_updates)
        if not isinstance(task_updates, OrderedDict): task_updates = OrderedDict(task_updates)
        if not isinstance(init_updates, OrderedDict): init_updates = OrderedDict(init_updates)
        # task_metrics
        self.names = set().union(step_updates.keys(), task_updates.keys(), task_metrics.keys())
        if 'grads' in self.names or 'deltas' in self.names:
            raise ValueError("Optimization variables cannot be named 'grads' or 'deltas'")
        self.step_updates = step_updates
        self.task_updates = task_updates
        self.init_updates = init_updates
        self.compute_average_loss = compute_average_loss
        self.regularizer_fn = regularizer_fn
        # Compute loss and gradients
        self.lam = K.variable(value=lam, dtype=tf.float32, name="lam")
        self.nb_data = K.variable(value=1.0, dtype=tf.float32, name="nb_data")
        self.opt = opt
        #self.compute_fisher = compute_fisher
        #if compute_fisher and model is None:
        #    raise ValueError("To compute Fisher information, you need to pass in a Keras model object ")
        self.model = model
        self.task_metrics = task_metrics
        self.compute_average_weights = compute_average_weights

    def set_strength(self, val):
        K.set_value(self.lam, val)

    def set_nb_data(self, nb):
        K.set_value(self.nb_data, nb)

    def get_updates(self, weights, constraints, initial_loss, model=None):
        self.weights = weights
        # Allocate variables
        with tf.variable_scope("KOOptimizer"):
            self._allocate_vars(self.names)

        #grads = self.get_gradients(loss, params)

        # Compute loss and gradients
        self.regularizer = 0.0 if self.regularizer_fn is None else self.regularizer_fn(weights, self.vars)
        self.initial_loss = initial_loss
        self.loss = initial_loss + self.lam * self.regularizer
        with tf.variable_scope("wrapped_optimizer"):
            self._weight_update_op, self._grads, self._deltas = compute_updates(self.opt, self.loss, weights)

        wrapped_opt_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "wrapped_optimizer")
        self.init_opt_vars = tf.variables_initializer(wrapped_opt_vars)

        self.vars['unreg_grads'] = dict(zip(weights, tf.gradients(self.initial_loss, weights)))
        # Compute updates
        self.vars['grads'] = dict(zip(weights, self._grads))
        self.vars['deltas'] = dict(zip(weights, self._deltas))
        # Keep a pointer to self in vars so we can use it in the updates
        self.vars['oopt'] = self
        # Keep number of data samples handy for normalization purposes
        self.vars['nb_data'] = self.nb_data

        if self.compute_average_weights:
            with tf.variable_scope("weight_emga") as scope:
                weight_ema = tf.train.ExponentialMovingAverage(decay=0.99, zero_debias=True)
                self.maintain_weight_averages_op = weight_ema.apply(self.weights)
                self.vars['average_weights'] = {w: weight_ema.average(w) for w in self.weights}
            self.weight_ema_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope.name)
            self.init_weight_ema_vars = tf.variables_initializer(self.weight_ema_vars)
            print(">>>>>")
            K.get_session().run(self.init_weight_ema_vars)
        if self.compute_average_loss:
            with tf.variable_scope("ema") as scope:
                ema = tf.train.ExponentialMovingAverage(decay=0.99, zero_debias=True)
                self.maintain_averages_op = ema.apply([self.initial_loss])
                self.ema_loss = ema.average(self.initial_loss)
                self.prev_loss = tf.Variable(0.0, trainable=False, name="prev_loss")
                self.delta_loss = tf.Variable(0.0, trainable=False, name="delta_loss")
            self.ema_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope.name)
            self.init_ema_vars = tf.variables_initializer(self.ema_vars)
#        if self.compute_fisher:
#            self._fishers, _, _, _ = compute_fishers(self.model)
#            #fishers = compute_fisher_information(model)
#            self.vars['fishers'] = dict(zip(weights, self._fishers))
#            #fishers, avg_fishers, update_fishers, zero_fishers = compute_fisher_information(model)

        def _var_update(vars, update_fn):
            updates = []
            for w in weights:
                updates.append(tf.assign(vars[w], update_fn(self.vars, w, vars[w])))
            return tf.group(*updates)

        def _compute_vars_update_op(updates):
            # Force task updates to happen sequentially
            update_op = tf.no_op()
            for name, update_fn in updates.items():
                with tf.control_dependencies([update_op]):
                    update_op = _var_update(self.vars[name], update_fn)
            return update_op

        self._vars_step_update_op = _compute_vars_update_op(self.step_updates)
        self._vars_task_update_op = _compute_vars_update_op(self.task_updates)
        self._vars_init_update_op = _compute_vars_update_op(self.init_updates)

        # Create task-relevant update ops
        reset_ops = []
        update_ops = []
        for name, metric_fn in self.task_metrics.items():
            metric = metric_fn(self)
            for w in weights:
                reset_ops.append(tf.assign(self.vars[name][w], 0*self.vars[name][w]))
                update_ops.append(tf.assign_add(self.vars[name][w], metric[w]))
        self._reset_task_metrics_op = tf.group(*reset_ops)
        self._update_task_metrics_op = tf.group(*update_ops)

        # Each step we update the weights using the optimizer as well as the step-specific variables
        self.step_op = tf.group(self._weight_update_op, self._vars_step_update_op)
        self.updates.append(self.step_op)
        # After each task, run task-specific variable updates
        self.task_op = self._vars_task_update_op
        self.init_op = self._vars_init_update_op

        if self.compute_average_weights:
            self.updates.append(self.maintain_weight_averages_op)

        if self.compute_average_loss:
            self.update_loss_op = tf.assign(self.prev_loss, self.ema_loss)
            bupdates = self.updates
            with tf.control_dependencies(bupdates + [self.update_loss_op]):
                self.updates = [tf.group(*[self.maintain_averages_op])]
            self.delta_loss = self.prev_loss - self.ema_loss

        return self.updates#[self._base_updates

    def init_task_vars(self):
        K.get_session().run([self.init_op])

    def init_acc_vars(self):
        K.get_session().run(self.init_ema_vars)

    def init_loss(self, X, y, batch_size):
        pass
        #sess = K.get_session()
        #xi, yi, sample_weights = self.model.model._standardize_user_data(X[:batch_size], y[:batch_size], batch_size=batch_size)
        #sess.run(tf.assign(self.prev_loss, self.initial_loss), {self.model.input:xi[0], self.model.model.targets[0]:yi[0], self.model.model.sample_weights[0]:sample_weights[0], K.learning_phase():1})

    def update_task_vars(self):
        K.get_session().run(self.task_op)

    def update_task_metrics(self, X, y, batch_size):
        # Reset metric accumulators
        n_batch = len(X) // batch_size

        sess = K.get_session()
        sess.run(self._reset_task_metrics_op)
        for i in range(n_batch):
            xi, yi, sample_weights = self.model.model._standardize_user_data(X[i * batch_size:(i+1) * batch_size], y[i*batch_size:(i+1)*batch_size], batch_size=batch_size)
            sess.run(self._update_task_metrics_op, {self.model.input:xi[0], self.model.model.targets[0]:yi[0], self.model.model.sample_weights[0]:sample_weights[0]})


    def reset_optimizer(self):
        """Reset the optimizer variables"""
        K.get_session().run(self.init_opt_vars)

    def get_config(self):
        raise ValueError("Write the get_config bro")

    def get_numvals_list(self, key='omega'):
        """ Returns list of numerical values such as for instance omegas in reproducible order """
        variables = self.vars[key]
        numvals = []
        for p in self.weights:
            numval = K.get_value(tf.reshape(variables[p],(-1,)))
            numvals.append(numval)
        return numvals

    def get_numvals(self, key='omega'):
        """ Returns concatenated list of numerical values such as for instance omegas in reproducible order """
        conc = np.concatenate(self.get_numvals_list(key))
        return conc

    def get_state(self):
        state = []
        vs = self.vars
        for key in vs.keys():
            if key=='oopt': continue
            v = vs[key]
            for p in v.values():
                state.append(K.get_value(p)) # FIXME WhyTF does this not work?
        return state

    def set_state(self, state):
        c = 0
        vs = self.vars
        for key in vs.keys():
            if key=='oopt': continue
            v = vs[key]
            for p in v.values():
                K.set_value(p,state[c])
                c += 1

In [59]:
# """""""CONSTRUCT NETWORK, LOSS, UPDATES"""""" 
tf.reset_default_graph()

config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.InteractiveSession(config=config)
sess.run(tf.global_variables_initializer())

model = Sequential()
model.add(Dense(n_hidden_units, activation=activation_fn, input_dim=input_dim))
model.add(Dense(n_hidden_units, activation=activation_fn))
model.add(Dense(output_dim, activation='softmax'))

protocol_name, protocol = PATH_INT_PROTOCOL(omega_decay='sum', xi=xi) # protocol_name, protocol = protocols.FISHER_PROTOCOL(omega_decay='sum')
opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999) # opt = SGD(lr=learning_rate)
opt_name = 'adam' # opt_name = 'sgd'
oopt = KOOptimizer(opt, model=model, **protocol)
model.compile(loss="categorical_crossentropy", optimizer=oopt, metrics=['accuracy'])

history = LossHistory()
callbacks = [history]


file_prefix = "data_%s_opt%s_lr%.2e_bs%i_ep%i_tsks%i"%(protocol_name, opt_name, learning_rate, batch_size, epochs_per_task, n_tasks)
datafile_name = "%s.pkl.gz"%(file_prefix)

In [63]:
# """""""TRAIN"""""" 
diag_vals = dict()
all_evals = dict()

def run_fits(cvals, training_data, valid_data, eval_on_train_set=False):
    for cidx, cval_ in enumerate(cvals):
        fs = []
        evals = []
        sess.run(tf.global_variables_initializer())
        cstuffs = []
        cval = cval_
        print( "setting cval")
        oopt.set_strength(cval)
        print("cval is %e"%sess.run(oopt.lam))
        for age, tidx in enumerate(range(n_tasks)):
            print("Age %i, cval is=%f"%(age,cval))
            oopt.set_nb_data(len(training_data[tidx][0]))
            stuffs = model.fit(training_data[tidx][0], training_data[tidx][1], batch_size, epochs_per_task, callbacks=callbacks,
                              verbose=0)
            oopt.update_task_metrics(training_data[tidx][0], training_data[tidx][1], batch_size)
            oopt.update_task_vars()
            ftask = []
            for j in range(n_tasks):
                if eval_on_train_set:
                    f_ = model.evaluate(training_data[j][0], training_data[j][1], batch_size, verbose=0)
                else:
                    f_ = model.evaluate(valid_data[j][0], valid_data[j][1], batch_size, verbose=0)
                ftask.append(np.mean(f_[1]))
            evals.append(ftask)
            cstuffs.append(stuffs)
            
            # Re-initialize optimizer variables
            if reset_optimizer:
                oopt.reset_optimizer()

        # diag_vals[cval_] = oopt.get_numvals('omega')
        evals = np.array(evals)
        all_evals[cval_] = evals
        
        # backup all_evals to disk
        utils.save_zipped_pickle(all_evals, datafile_name)

In [64]:
cvals = [0, 0.01, 0.1, 1.0]
print(cvals)

[0, 0.01, 0.1, 1.0]


In [65]:
run_fits(cvals, training_datasets, validation_datasets)

setting cval
cval is 0.000000e+00
Age 0, cval is=0.000000


TypeError: get_updates() got an unexpected keyword argument 'params'

In [66]:
# backup all_evals to disk
# all_evals = dict() # uncomment to delete on disk
utils.save_zipped_pickle(all_evals, datafile_name)

In [71]:
cmap = plt.get_cmap('cool') 
cNorm  = colors.Normalize(vmin=-4, vmax=np.log(np.max(list(all_evals.keys()))))
# cNorm  = colors.Normalize()
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cmap)
print(scalarMap.get_clim())

figure(figsize=(14, 4))
axs = [subplot(1,n_tasks+1,1)]#, None, None]
for i in range(1, n_tasks + 1):
    axs.append(subplot(1, n_tasks+1, i+1, sharex=axs[0], sharey=axs[0]))
    
keys = list(all_evals.keys())
sorted_keys = np.sort(keys)


for cval in sorted_keys:
    evals = all_evals[cval]
    for j in range(n_tasks):
        colorVal = scalarMap.to_rgba(np.log(cval))
        axs[j].plot(evals[:, j], c=colorVal)#, label="t%d, c%g"%(j, cval))
    label = "c=%g"%cval
    average = evals.mean(1)
    axs[-1].plot(average, c=colorVal, label=label)
    
for i, ax in enumerate(axs):
    ax.legend(bbox_to_anchor=(1.0,1.0))
    ax.set_title((['task %d'%j for j in range(n_tasks)] + ['average'])[i])
gcf().tight_layout()


for cval in sorted_keys:
    stuff = []
    for i in range(len(all_evals[cval])):#n_tasks):
        stuff.append(all_evals[cval][i][:i+1].mean())
    plot(range(1,n_tasks+1), stuff, 'o-', label="c=%g"%cval)
axhline(all_evals[cval][0][0], linestyle='--', color='k')
    
xlabel('Number of tasks')
ylabel('Fraction correct')
legend(loc='best')
ylim(0.9, 1.02)
xlim(0.5, 10.5)
grid('on')

savefig("%s.pdf"%(file_prefix))

ValueError: zero-size array to reduction operation maximum which has no identity

In [None]:
# Extract labels from MNIST labels into vector
def extract_labels(filename, num_images):
  with gzip.open(filename) as bytestream:
    bytestream.read(8)
    buf = bytestream.read(1 * num_images)
    labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
  return labels

train_labels = extract_labels("MNIST-data/train-labels-idx1-ubyte.gz", 60000)
eval_labels = extract_labels("MNIST-data/t10k-labels-idx1-ubyte.gz", 10000)
print(np.shape(train_labels))
print(np.shape(eval_labels))

In [None]:
# original train
train_original = np.zeros((60000,28,28), dtype=np.float32)
images_original = ["MNIST-processed-training/original/original{0}.png".format(k) for k in range(1,60000)]

for i in range(len(images_original)):
    img = np.array(Image.open(images_original[i]))
    train_original[i, :, :] = img
 
print(np.shape(train_original))

# original test
eval_original = np.zeros((10000,28,28), dtype=np.float32)
images2_original = ["MNIST-processed-test/original/test-original{0}.png".format(k) for k in range(1,10001)]

for i in range(len(images2_original)):
    img = np.array(Image.open(images2_original[i]))
    eval_original[i, :, :] = img
 
print(np.shape(eval_original))

In [None]:
# ROTATE 90 train
train_rot90 = np.zeros((60000,28,28), dtype=np.float32)
images_rot90 = ["MNIST-processed-training/rot90/rot90{0}.png".format(k) for k in range(1,60000)]

for i in range(len(images_rot90)):
    img = np.array(Image.open(images_rot90[i]))
    train_rot90[i, :, :] = img
 
print(np.shape(train_rot90))

# ROTATE 90 test
eval_rot90 = np.zeros((10000,28,28), dtype=np.float32)
images2_rot90 = ["MNIST-processed-test/rot90/test-rot90{0}.png".format(k) for k in range(1,10001)]

for i in range(len(images2_rot90)):
    img = np.array(Image.open(images2_rot90[i]))
    eval_rot90[i, :, :] = img
 
print(np.shape(eval_rot90))

In [None]:
# checkerboard train
train_checkerboard = np.zeros((60000,28,28), dtype=np.float32)
images_checkerboard = ["MNIST-processed-training/checkerboard/fullcheck{0}.png".format(k) for k in range(1,60000)]

for i in range(len(images_checkerboard)):
    img = np.array(Image.open(images_checkerboard[i]))
    train_checkerboard[i, :, :] = img
 
print(np.shape(train_checkerboard))

# checkerboard test
eval_checkerboard = np.zeros((10000,28,28), dtype=np.float32)
images2_checkerboard = ["MNIST-processed-test/checkerboard/test-checkerboard{0}.png".format(k) for k in range(1,10001)]

for i in range(len(images2_checkerboard)):
    img = np.array(Image.open(images2_checkerboard[i]))
    eval_checkerboard[i, :, :] = img
 
print(np.shape(eval_checkerboard))

In [None]:
# INV train
train_inv = np.zeros((60000,28,28), dtype=np.float32)
images_inv = ["MNIST-processed-training/Inv/inv{0}.png".format(k) for k in range(1,60000)]

for i in range(len(images_inv)):
    img = np.array(Image.open(images_inv[i]))
    train_inv[i, :, :] = img
 
print(np.shape(train_inv))

# INV test
eval_inv = np.zeros((10000,28,28), dtype=np.float32)
images2_inv = ["MNIST-processed-test/inv/test-inv{0}.png".format(k) for k in range(1,10001)]

for i in range(len(images2_inv)):
    img = np.array(Image.open(images2_inv[i]))
    eval_inv[i, :, :] = img
 
print(np.shape(eval_inv))

In [None]:
# invbot train
train_invbot = np.zeros((60000,28,28), dtype=np.float32)
images_invbot = ["MNIST-processed-training/invbot/invbot{0}.png".format(k) for k in range(1,60000)]

for i in range(len(images_invbot)):
    img = np.array(Image.open(images_invbot[i]))
    train_invbot[i, :, :] = img
 
print(np.shape(train_invbot))

# invbot test
eval_invbot = np.zeros((10000,28,28), dtype=np.float32)
images2_invbot = ["MNIST-processed-test/invbot/test-invbot{0}.png".format(k) for k in range(1,10001)]

for i in range(len(images2_invbot)):
    img = np.array(Image.open(images2_invbot[i]))
    eval_invbot[i, :, :] = img
 
print(np.shape(eval_invbot))

In [None]:
# fliplr train
train_fliplr = np.zeros((60000,28,28), dtype=np.float32)
images_fliplr = ["MNIST-processed-training/fliplr/fliplr{0}.png".format(k) for k in range(1,60000)]

for i in range(len(images_fliplr)):
    img = np.array(Image.open(images_fliplr[i]))
    train_fliplr[i, :, :] = img
 
print(np.shape(train_fliplr))

# fliplr test
eval_fliplr = np.zeros((10000,28,28), dtype=np.float32)
images2_fliplr = ["MNIST-processed-test/fliplr/test-fliplr{0}.png".format(k) for k in range(1,10001)]

for i in range(len(images2_fliplr)):
    img = np.array(Image.open(images2_fliplr[i]))
    eval_fliplr[i, :, :] = img
 
print(np.shape(eval_fliplr))

In [None]:
# cutud train
train_cutud = np.zeros((60000,28,28), dtype=np.float32)
images_cutud = ["MNIST-processed-training/cutud/cutUD{0}.png".format(k) for k in range(1,60000)]

for i in range(len(images_cutud)):
    img = np.array(Image.open(images_cutud[i]))
    train_cutud[i, :, :] = img
 
print(np.shape(train_cutud))

# cutud test
eval_cutud = np.zeros((10000,28,28), dtype=np.float32)
images2_cutud = ["MNIST-processed-test/cutud/test-cutud{0}.png".format(k) for k in range(1,10001)]

for i in range(len(images2_cutud)):
    img = np.array(Image.open(images2_cutud[i]))
    eval_cutud[i, :, :] = img
 
print(np.shape(eval_cutud))

In [None]:
# flipud train
train_flipud = np.zeros((60000,28,28), dtype=np.float32)
images_flipud = ["MNIST-processed-training/flipud/flipud{0}.png".format(k) for k in range(1,60000)]

for i in range(len(images_flipud)):
    img = np.array(Image.open(images_flipud[i]))
    train_flipud[i, :, :] = img
 
print(np.shape(train_data))

# flipud test
eval_flipud = np.zeros((10000,28,28), dtype=np.float32)
images2_flipud = ["MNIST-processed-test/flipud/test-flipud{0}.png".format(k) for k in range(1,10001)]

for i in range(len(images2_flipud)):
    img = np.array(Image.open(images2_flipud[i]))
    eval_flipud[i, :, :] = img
 
print(np.shape(eval_flipud))

In [None]:
def cnn_model_fn(features, labels, mode):

    # Input Layer
    input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])

    # Convolutional layer 1
    conv1 = tf.layers.conv2d(
        inputs = input_layer,
        filters = 32,
        kernel_size=[5,5],
        padding="same",
        activation=tf.nn.relu
    )

    # Pooling 1
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    # Convolutional layer 2 and pooling layer
    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5,5],
        padding="same",
        activation=tf.nn.relu
    )

    # Pooling 2 with flattening
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2,2], strides=2)
    pool2_flat=tf.reshape(pool2, [-1, 7 * 7 * 64])

    # Dense layer with dropout 
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    dropout=tf.layers.dropout(inputs=dense, rate=0.4, training=mode==tf.estimator.ModeKeys.TRAIN)
    logits = tf.layers.dense(inputs=dropout, units=10)

    # Generate predictions
    predictions = {
        "classes": tf.argmax(input=logits, axis=1),
        "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Caluclate loss
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    # Configure training op
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # Add evaluation metrics
    eval_metric_ops = {
        "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}

    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [None]:
# Estimator
mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, model_dir="/tmp/mnist_convnet_model")

# Logging predictions
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
    tensors=tensors_to_log, 
    every_n_iter=50)

# Our application logic will be added here

In [None]:
# # Load rotated images for training
# images = ["MNIST-processed-training/rot90/rot90{0}.png".format(k) for k in range(1,60001)]

# train_data = [] 
# for image in images: 
#     img = Image.open(image)
#     arr = np.array(img)
#     train_data.append(arr)
# #     train_data.append(imageio.imread(image))
    
# # Flatten images    
# for i in range(len(train_data)):
#     train_data[i] = np.array(train_data[i].flatten())
    
# print(np.shape(train_data))

In [None]:
# mnist = tf.contrib.learn.datasets.load_dataset("mnist")

# train_data_int = mnist.train.images
# train_labels_int = np.asarray(mnist.train.labels, dtype=np.int32)
# eval_data_int = mnist.test.images
# eval_labels_int = np.asarray(mnist.test.labels, dtype=np.int32)

# print(np.shape(train_data_int))
# print(np.shape(train_labels_int))
# print(np.shape(eval_data_int))
# print(np.shape(eval_labels_int))

# def parse_img(filename, label):
#     img_str = tf.read_file(filename)
#     img_dec = tf.image.decode_image(img_str)
# #     img_res = tf.image.resize_images(img_dec, [28, 28])
#     return img_dec, label

# # Load rotated images for training
# filenames = tf.constant(["MNIST-processed-training/rot90/rot90{0}.png".format(k) for k in range(1,60001)])

# labels = train_labels
# dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
# dataset = dataset.map(parse_img)
    
# print(np.shape(dataset))

In [None]:
def main(unused_argv):
    # Training on original
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x":train_original},
        y=train_labels,
        batch_size=1000,
        num_epochs=None,
        shuffle=True)
    
    mnist_classifier.train(
        input_fn=train_input_fn,
        steps=200,
        hooks=[logging_hook])

    # Evaluation on original
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": eval_original},
        y=eval_labels,
        num_epochs=1,
        shuffle=False)
    
    eval_results=mnist_classifier.evaluate(input_fn=eval_input_fn)
    print(eval_results)

In [None]:
if __name__ == "__main__":
  tf.app.run()

In [None]:
def main(unused_argv):
    # Training on rot90
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x":train_rot90},
        y=train_labels,
        batch_size=1000,
        num_epochs=None,
        shuffle=True)
    
    mnist_classifier.train(
        input_fn=train_input_fn,
        steps=200,
        hooks=[logging_hook])

    # Evaluation on rot90
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": eval_rot90},
        y=eval_labels,
        num_epochs=1,
        shuffle=False)
    
    eval_results=mnist_classifier.evaluate(input_fn=eval_input_fn)
    print(eval_results)

In [None]:
if __name__ == "__main__":
  tf.app.run()

In [None]:
# def main(unused_argv):
#     # Training on rot90
#     train_input_fn = tf.estimator.inputs.numpy_input_fn(
#         x={"x":train_inv},
#         y=train_labels,
#         batch_size=1000,
#         num_epochs=None,
#         shuffle=True)
    
#     mnist_classifier.train(
#         input_fn=train_input_fn,
#         steps=200,
#         hooks=[logging_hook])

#     # Evaluation on rot90
#     eval_input_fn = tf.estimator.inputs.numpy_input_fn(
#         x={"x": eval_inv},
#         y=eval_labels,
#         num_epochs=1,
#         shuffle=False)
    
#     eval_results=mnist_classifier.evaluate(input_fn=eval_input_fn)
#     print(eval_results)

In [None]:
# if __name__ == "__main__":
#   tf.app.run()

In [None]:
#model.layers[-1].get_weights()

In [None]:
# CONSTANTS
batch_size = 128
num_classes = 10
epochs = 20

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD, Adam, RMSprop
from keras.callbacks import Callback

In [None]:
#from pathint import protocols
#from pathint.optimizers import KOOptimizer
#from pathint.keras_utils import LossHistory

In [None]:
y_train = keras.utils.to_categorical(train_original, num_classes)
y_test = keras.utils.to_categorical(test_original, num_classes)


In [None]:
x_train_original = train_original.reshape(60000, 784)
x_eval_original = eval_original.reshape(10000, 784)

x_train_original = reshape_train_original.astype('float32')
x_eval_original = reshape_eval_original.astype('float32')

x_train_original /= 255
x_eval_original /= 255

print(reshape_train_original.shape[0], 'train samples')
print(reshape_eval_original.shape[0], 'test samples')

In [None]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(train_labels, num_classes)
y_eval = keras.utils.to_categorical(eval_labels, num_classes)

In [None]:
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train_original, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_eval_original, y_eval))
score = model.evaluate(x_eval_original, y_eval, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
weights = model.layers[-1].get_weights()

In [None]:
print(np.shape(weights[0]))