In [1]:
import tensorflow as tf
import datetime
import shutil
import os
import numpy as np
import matplotlib.pyplot as plt
%load_ext tensorboard

In [2]:
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train = np.array(tf.one_hot(y_train, depth=10, dtype='float32'))
y_test = np.array(tf.one_hot(y_test, depth=10, dtype='float32'))
mtrain = x_train.shape[0]
mtest = x_test.shape[0]
x_train = x_train.reshape(mtrain, 28*28)
x_test = x_test.reshape(mtest, 28*28)

In [3]:
class MiniBatches(object):
    
    def __init__(self, x, y, batchsize):
        self.x = x
        self.y = y
        self.batchsize = batchsize
        self.nsamples = len(self.x)
        self.nbatches = int(self.nsamples / self.batchsize)
        self.initialize()
        print(x.shape, y.shape)
        
    def initialize(self):
        self.counter = 0
        self.idx = np.arange(0,self.nsamples)
        np.random.shuffle(self.idx)
        
    def next_batch(self):
        if (self.counter+1)*self.batchsize<self.nsamples:
            self.initialize()
        idx = self.idx[self.counter*self.batchsize:(self.counter+1)*self.batchsize]
        batchx = self.x[idx]
        batchy = self.y[idx]
        self.counter += 1
        return batchx, batchy

In [4]:
def softmax(z):
    expz = np.exp(z)
    norm = np.sum(expz, axis=0)
    return expz / norm

class MyModel():
    
    def __init__(self, initializer):
        w, b = initializer.initialize()
        self.w = w
        self.b = b
        self.nlayers = len(w)
        print("Number of Layers: %i"%self.nlayers)

    def forward(self, x, y):
        """
        Performs the forward prop (by using the parameters self.w and self.b) and computes the loss.
        It also caches the activations in all the layers (except for the input layer)
        """
        self.activations = [] # remember the activations in the different layers
        
        ### START YOUR CODE HERE ###
        
        a = x 
        for i in range(self.nlayers - 1):
            a = tf.sigmoid(tf.matmul(a, self.w[i]) + self.b[i])
            self.activations.append(a)
            
        logits = tf.matmul(a, self.w[i+1]) + self.b[i+1]
        scores = tf.nn.softmax(logits)
        self.activations.append(scores)
        
        ### END YOUR CODE HERE ###
        
        loss = self.ce_loss(scores, y)

        return scores, loss
        
    def mlp_test(self, x, y):
        return self.forward(x,y)
    
    def ce_loss(self, scores, target):
        return tf.reduce_mean(
            tf.keras.backend.categorical_crossentropy(target, scores, from_logits=False))

    
    def mlp_train(self, x, y, alpha):
        """
        Performs the forward prop and then the backprop followed by a parameter update.
        It also caches the gradients w.r.t. parameter in all the layers.
        """
        
        with tf.GradientTape() as tape:
            scores, loss = self.forward(x,y)

        # the following provides the gradient as a list of tensors, first with respect to the weights in the given layers
        # then w.r.t. to the bias in the given layers
        p = self.w + self.b
        grads = tape.gradient(loss, p) 
        
        ### START YOUR CODE HERE ###
        
        self.gradsw = grads[:self.nlayers]
        self.gradsb = grads[self.nlayers:]

        # parameter update
        for i in range(self.nlayers):
            self.w[i].assign(self.w[i] - alpha * self.gradsw[i])
            self.b[i].assign(self.b[i] - alpha * self.gradsb[i])
            
        ### END YOUR CODE HERE ###
            
        return scores, loss


In [5]:
class Initializer():
    
    def __init__(self, n0, layershapes, stdev_scaling):
        self.n0 = n0
        self.layershapes = layershapes
        self.scaling = stdev_scaling
        
    
    def initialize(self):
        w, b = [], []
        nprev = self.n0
        for n in self.layershapes:
            stdev = self.scaling(n, nprev)
            w.append(tf.Variable(tf.random.normal((nprev, n), stddev = stdev, dtype='float64')))
            b.append(tf.Variable(tf.zeros(shape=[n], dtype='float64')))
            nprev = n
        
        tf.print(','.join("%s"%e.shape for e in w))
        tf.print(','.join("%s"%e.shape for e in b))
        return w, b        
    
def stdev_scale_none(n, nprev):
    return 1.0

def stdev_scale_in(n, nprev):
    return 1.0 / np.sqrt(nprev)

def stdev_scale_out(n, nprev):
    return 1.0 / np.sqrt(n)

def stdev_scale_glorot(n, nprev):
    return 1.0 / np.sqrt(0.5*(nprev+n))
        

In [6]:
def create_writers(log_dir): 
    train_log_dir = "%s/train"%log_dir
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    test_log_dir = "%s/test"%log_dir
    test_summary_writer = tf.summary.create_file_writer(test_log_dir)
    return train_summary_writer, test_summary_writer

In [12]:
### START YOUR CODE HERE ###

layers = [20, 20, 20, 20, 20, 10]
init_scaling = stdev_scale_glorot
run_name = "complex_model"

batchsize = 64
lr = 0.1
epochs = 20

### STOP YOUR CODE HERE ###


tensorboard_folder = "tb_logs" # root directory you pass to start tensorboard with


current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
outdir = os.path.join(os.getcwd(), tensorboard_folder, current_time)
log_dir = "%s/%s"%(outdir, run_name)
train_summary_writer, test_summary_writer = create_writers(log_dir)

n0=784
nL = 10
samples = x_train.shape[0]

nlayers = len(layers)
nbatches = int(samples/batchsize)
trainset = MiniBatches(x_train, y_train, batchsize)

initializer = Initializer(n0, layers, init_scaling)
model = MyModel(initializer) # instantiate model object

avg_train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
acc_train = tf.keras.metrics.CategoricalAccuracy('train_acc', dtype=tf.float32)
avg_test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
acc_test = tf.keras.metrics.CategoricalAccuracy('test_acc', dtype=tf.float32)

for epoch in range(epochs):
    trainset.initialize()

    epoch_data = {"loss":[], 
                  "activ" : {i : [] for i in range(nlayers)}, 
                  "gradb" : {i : [] for i in range(nlayers)}, 
                  "gradw" : {i : [] for i in range(nlayers)}} 
    
    for i in range(nbatches):

        ### START YOUR CODE HERE ###
        
        # grab a batch and prop and backprop it
        
        batchx, batchy = trainset.next_batch()
        scores, loss = model.mlp_train(batchx, batchy, lr)

        # update epoch_data dict
        epoch_data["loss"].append(loss)
        for i in range(nlayers):
            epoch_data["activ"][i].append(model.activations[i])
            epoch_data["gradb"][i].append(model.gradsb[i])
            epoch_data["gradw"][i].append(model.gradsw[i])

        ### STOP YOUR CODE HERE ###
              
        avg_train_loss.update_state(loss)
        acc_train.update_state(batchy, scores)
            
    with train_summary_writer.as_default():
        tf.summary.histogram(name="loss", data=epoch_data["loss"], step=epoch)        
        for i in range(nlayers):            
            tf.summary.histogram(name="activ_%i"%i, data=epoch_data["activ"][i], step=epoch)
            tf.summary.histogram(name="b_%i"%i, data=model.w[i], step=epoch)
            tf.summary.histogram(name="w_%i"%i, data=model.b[i], step=epoch)
            tf.summary.histogram(name="gradw_%i"%i, data=epoch_data["gradw"][i], step=epoch)
            tf.summary.histogram(name="gradb_%i"%i, data=epoch_data["gradb"][i], step=epoch)
                    
    scores, loss = model.mlp_test(x_test, y_test)
    avg_test_loss(loss)
    acc_test.update_state(y_test, scores)

    with train_summary_writer.as_default():
        tf.summary.scalar('avg_train_loss', avg_train_loss.result(), step=epoch)
        tf.summary.scalar('train_accuracy', acc_train.result(), step=epoch)
  
    with test_summary_writer.as_default():
        tf.summary.scalar('avg_test_loss', avg_test_loss.result(), step=epoch)
        tf.summary.scalar('test_accuracy', acc_test.result(), step=epoch)
    
    template = 'epoch {}, loss (train): {}, loss (test): {}, acc (train): {}, acc (test): {}'
    print (template.format(epoch+1,avg_train_loss.result(), avg_test_loss.result(), acc_train.result(), acc_test.result()))

    # Reset metrics every epoch
    avg_train_loss.reset_states()
    avg_test_loss.reset_states()
    acc_train.reset_states()
    acc_test.reset_states()


(60000, 784) (60000, 10)
(784, 20),(20, 20),(20, 20),(20, 20),(20, 20),(20, 10)
(20,),(20,),(20,),(20,),(20,),(10,)
Number of Layers: 6
epoch 1, loss (train): 2.305002212524414, loss (test): 2.303969621658325, acc (train): 0.10807430744171143, acc (test): 0.0982000008225441
epoch 2, loss (train): 2.303316354751587, loss (test): 2.3029377460479736, acc (train): 0.10752401500940323, acc (test): 0.11349999904632568
epoch 3, loss (train): 2.3029592037200928, loss (test): 2.3048312664031982, acc (train): 0.10895811021327972, acc (test): 0.11349999904632568
epoch 4, loss (train): 2.3022470474243164, loss (test): 2.302213430404663, acc (train): 0.11210979521274567, acc (test): 0.11349999904632568
epoch 5, loss (train): 2.303105115890503, loss (test): 2.301954507827759, acc (train): 0.10895811021327972, acc (test): 0.10279999673366547
epoch 6, loss (train): 2.3025882244110107, loss (test): 2.3012897968292236, acc (train): 0.10612326860427856, acc (test): 0.11349999904632568
epoch 7, loss (trai

If it says "Reusing instance on port xxxx (pid yyyy), ...", but that process is not running anymore, you have to delete the ".tensorboard-info" directory in your temp-directory.

Alternatevly you can also access tensorboard on http://localhost:6006 (by default) if you are running this notebook on your local system.

There is a bug in tensorboard right now where it has problems if new event-files get added.\
Restarting Tensorboard fixes the problem.\
(see https://github.com/tensorflow/tensorboard/issues/2084)

In [13]:
%tensorboard --logdir tb_logs