
<h1 id="CMSC-636,-HW3:-Multilayer-perceptron-on-MNIST-dataset">CMSC 636, HW3: Multilayer perceptron on MNIST dataset<a class="anchor-link" href="#CMSC-636,-HW3:-Multilayer-perceptron-on-MNIST-dataset">¶</a></h1><h3 id="NOTE:-Do-not-use-the-Keras-library">NOTE: Do not use the Keras library<a class="anchor-link" href="#NOTE:-Do-not-use-the-Keras-library">¶</a></h3>


In [1]:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from types import SimpleNamespace
%matplotlib inline




<h1 id="1.-Load-MNIST">1. Load MNIST<a class="anchor-link" href="#1.-Load-MNIST">¶</a></h1>


In [2]:

# load mnist dataset with labels encoded as one-hot vectors
class Dataset():
    def __init__(self, data):
        self.data = data
        self.index = 0
        self.epochs = 0

    def shuffle(self):
        perm = np.arange(self.data[0].shape[0])
        np.random.shuffle(perm)
        self.data = tuple(datai[perm] for datai in self.data)
    
    def next_batch(self, batch_size):
        start = self.index
        end = self.index + batch_size
        if end > self.data[0].shape[0]:
            self.epochs += 1
            self.shuffle()
            self.index, start = 0, 0
            end = batch_size
        self.index = end
        return tuple(datai[start:end, ...] for datai in self.data)
            
def load_mnist():
    def preprocess(data, labels, num_classes):
        # flatten images
        data = data.astype(np.float32)/255.0
        data = np.reshape(data, [data.shape[0], -1])
        # one hot encoding
        num_labels = labels.shape[0]
        index_offset = np.arange(num_labels) * num_classes
        labels_one_hot = np.zeros((num_labels, num_classes))
        labels_one_hot.flat[index_offset + labels.ravel()] = 1
        return data, labels_one_hot
    train, test = tf.keras.datasets.mnist.load_data()
    train = preprocess(train[0], train[1], 10)
    test = preprocess(test[0], test[1], 10)
    return SimpleNamespace(
        train=Dataset(train), 
        test=Dataset(test))
mnist = load_mnist()



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


Exception: URL fetch failure on https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz: None -- [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1131)


<h1 id="2.-Model-definition">2. Model definition<a class="anchor-link" href="#2.-Model-definition">¶</a></h1>


In [None]:

# Define parameters of the network
input_size= 28*28
n_outputs=  ''' --- COMPLETE --- '''
n_hidden = ''' --- COMPLETE --- '''
batch_size= 100




<p>Define the model:</p>
<p>h1 = relu(x w1 + b1) &lt;/br&gt;
logits = h1 w2 + b2 &lt;/br&gt;</p>
<p>Where "x w1" is a matrix multiplication between the matices x and w1. 
The matrix x is a matrix whose rows represent the training input data.</p>


In [None]:

class DenseLayer(object):
    def __init__(self, n_inputs, n_units, afunc=None, w_stddev=0.01):
        '''Define the parameters of the layer'''
        self.w = tf.Variable(
            tf.random.truncated_normal([n_inputs, n_units], stddev=w_stddev),
            name='w')
        self.b = tf.Variable(
            tf.zeros([n_units]), 
            name='b')
        self.afunc = afunc
    
    def trainable_variables(self):
        '''return trainable variables'''
        return [self.w, self.b]

    def __call__(self, x):
        '''Layer function definition'''
        y = tf.matmul(x, self.w) + self.b
        if self.afunc is not None:
            y = self.afunc(y)
        return y

class LogisticReg(object):
    def __init__(self, layers):
        self.layers = layers
    
    def trainable_variables(self):
        return [var for layer in self.layers 
                for var in layer.trainable_variables()]
        
    def __call__(self, x, logits=False):
        '''call layers and apply softmax if logits=False'''
        # compute layers
        output = x
        for layer in self.layers:
            output = layer(output)
        # apply softmax if logits is false
        # use logits=True for training
        if not logits:  
            output = tf.nn.softmax(output)
        return output

def loss_fn(logits, labels, weights):
    '''compute softmax cross entory'''
    error = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits=logits, 
        labels=labels))
    # weights are not used... try to add a regularization loss
    # reg = ???
    return error
    
# define model
model = LogisticReg('''-----------COMPLETE-----------''')
weights = [layer.w for layer in model.layers]

# define optimizer.
optimizer = tf.keras.optimizers.Adam(1e-3)  




<h1 id="3.-Training">3. Training<a class="anchor-link" href="#3.-Training">¶</a></h1>


In [None]:

def accuracy(predictions, labels):
    if n_outputs == 1:
        return (100.0 * np.sum(np.greater(predictions, 0.5) == np.greater(labels, 0.5))/ predictions.shape[0])
    else:
        return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/ predictions.shape[0])



In [None]:

num_steps = 10000
summary_freq = 200
n_test_log = 10

def train_step(labels, inputs):
    '''run a single step of gradient descent'''
    # compute gradients
    with tf.GradientTape() as tape:
        logits = model(inputs, logits=True)
        loss = loss_fn(logits, labels, weights)
    # apply grandients to optimizer
    gradients = tape.gradient(loss, model.trainable_variables())
    optimizer.apply_gradients(zip(gradients, model.trainable_variables()))
    
    return loss.numpy(), model(inputs).numpy()



In [None]:

mean_loss= 0
train_accuracy= 0
# Perform num_steps training steps
for step in range(num_steps):
    # Get next batch of 100 images
    batch_X, batch_y= mnist.train.next_batch(batch_size)    
    # Call the optimizer to perform one step of the training
    l, train_pred = train_step(batch_y, batch_X)
    # Compute accuracy
    train_accuracy += accuracy(train_pred, batch_y)
    mean_loss += l
    if step%summary_freq == 0:
        # Mean train accuracy
        train_accuracy= train_accuracy/summary_freq
        # Evaluate accuracy on test dataset
        test_accuracy= 0
        for i in range(n_test_log):
            batch_X_test, batch_y_test= mnist.test.next_batch(batch_size) 
            pred = model(batch_X_test)
            test_accuracy += accuracy(pred, batch_y_test)
        test_accuracy= test_accuracy/n_test_log
        
        print(step, ', train:',train_accuracy,' | test:', test_accuracy, ' | loss:', mean_loss/summary_freq)
        mean_loss= 0
        train_accuracy= 0




<h1 id="4.-Test-the-model-using-testing-dataset">4. Test the model using testing dataset<a class="anchor-link" href="#4.-Test-the-model-using-testing-dataset">¶</a></h1>


In [None]:

# Acquire one sample from the mnist dataset
test_sample_x, test_sample_y= mnist.test.next_batch(1) 

# Get a prediction for this sample
pred = model(''' --- COMPLETE --- ''')

print('Actual number:', np.argmax(test_sample_y))
print('Prediction by the model:', ''' --- COMPLETE --- ''')

# plot
plt.imshow(np.reshape(test_sample_x, [28,28]))

