In [None]:
# -*- coding: utf-8 -*-
"""
Name: Shane Quinn
Student Number: R00144107
Email: shane.quinn1@mycit.ie
Course: MSc Artificial Intelligence
Module: Deep Learning
Date: 03/04/2021
"""

import tensorflow as tf
from tensorflow.python.client import device_lib
from keras.utils import np_utils
import matplotlib.pyplot as plt
import functools
import time


def forward_pass(X, w1, w2, b1, b2):
    """
    Push feature data through neural network. Returns 10 class probabilities for all feature instances

    Parameters
    ----------
    X : tf.Variable
        Pre-processed input data.
    w1 : tf.Variable
        Layer 1 learnable weights.
    w2 : tf.Variable
        Layer 2 learnable weights.
    b1 : tf.Variable
        Layer 1 bias.
    b2 : tf.Variable
        Layer 2 bias.

    Returns
    -------
    H : tf.Variable
        Softmax layer output predicted probability of each class.

    """

    #Layer 1 - 200 Relu Neurons
    A = tf.matmul(w1, tf.transpose(X)) + b1                    #A1 = x1.X + b1
    H = tf.keras.activations.relu(A)                           #H1 = act(A1)
    #Layer 2 - Softmax Layer
    A = tf.matmul(w2, H)+b2                                    #A2 = w2.H1 + b2
    #Softmax = (e^A2)/sum(A2)
    H = tf.exp(A) / tf.reduce_sum(tf.exp(A), axis=0)           #H2 = act(A2)
    
    return H
    

def cross_entropy(pred_y, y):
    """
    Take in softmax probabilities (output of forward_pass) and true class labels and calculate cross entropy loss

    Parameters
    ----------
    pred_y : tf.Variable
        Predictions (Output of softmax layer in forward_pass()).
    y : tf.Variable
        One-hot encoded true class labels.

    Returns
    -------
    cross_ent : Cross entropy loss
        tf.Variable.

    """
    
    #Cross entropy loss per class = -sum((True class encoded values)*log(predicted probabilities))     
    a = -tf.reduce_sum(y * tf.math.log(pred_y), axis=0)
    #Cross entropy loss = mean of all losses calculated above.
    cross_ent = tf.reduce_mean(a, axis=0)
    
    return cross_ent



def calculate_accuracy(pred_y, y, datatype=tf.float32):
    """
    Calculate the model accuracy given predicted probabilities and true class labels

    Parameters
    ----------
    pred_y : tf.Variable
        Predicted class probabilities, output of forward pass/softmax layer.
    y : tf.Variable
        True class values.
    datatype : tf.float32/tf.float64, optional
        One of the above tf datatypes. The default is tf.float32.

    Returns
    -------
    accuracy : float32
        Model Accuracy.

    """
    
    # Convert predicted probabilities to 0 or 1
    pred_y = tf.round(pred_y)
    # Boolean True (1) if prediction is correct, cast to tf.Variable
    predictions = tf.cast(tf.equal(pred_y, y), datatype)
    #Mean value of correct predictions
    accuracy = tf.reduce_mean(predictions)

    return accuracy


def exec_time(func):
    """
    Generic Execution time recorder, pass in function. Records execution time using decorators

    Parameters
    ----------
    func : FUNCTION
        Function we're recording and printing execution time of.
    """
    
    @functools.wraps(func)
    def record_exec_time(*args, **kwargs):
        start_time = time.perf_counter()
        mn = func(*args, **kwargs)
        execution_time = time.perf_counter() - start_time
        print("Execution Time: ", execution_time)
        return mn

    return record_exec_time

@exec_time 
def main():
      
    #Retrieve feature data/class labels
    X, y, X_val, y_val = pre_process() 

    #Initialise Learning rate and iterations.
    learning_rate = 0.05
    iterations = 1000
    datatype = tf.float64
    
    #Initialise lists for saving accuracies/loss
    te_acc = []
    tr_acc = []
    te_loss = []
    tr_loss = []
    
    # Create tf variables from data
    X = tf.cast(X, datatype)
    y = tf.cast(y, datatype)
    X_val = tf.cast(X_val, datatype)
    y_val = tf.cast(y_val, datatype)
    
    #Initialise Adam Optimizer
    adam = tf.keras.optimizers.Adam()
    
    #Initialise weights and bias
    zeros = tf.zeros_initializer()
    layer1_weights = tf.Variable(tf.random.normal([200,784], stddev=0.05, dtype=datatype))
    layer2_weights = tf.Variable(tf.random.normal([10, 200], stddev=0.05, dtype=datatype))
    layer1_bias = tf.Variable(0, dtype=datatype)
    layer2_bias = tf.Variable(0, dtype=datatype)
    
    #Repeat gradient descent loop 'iterations' times
    for i in range(iterations): 
        
        with tf.GradientTape() as tape:
            #Create instance of gradient tape to record forward pass and calculate gradients for learnable weights and biases 
            pred_y = forward_pass(X, layer1_weights, layer2_weights, layer1_bias, layer2_bias)
            loss = cross_entropy(pred_y, y)
        
        tr_loss.append(loss)
        gradients = tape.gradient(loss, [layer1_weights, layer2_weights, layer1_bias, layer2_bias])     #Calculate gradients 
        accuracy = calculate_accuracy(pred_y, y, datatype)                                              #Calculate accuracy of model
        tr_acc.append(accuracy)
        print("Iteration {}: Training Loss = {} Training Accuracy = {}".format(i, loss.numpy(), accuracy.numpy()))
        
        #Apply gradients using adaptive movement estimation, see accompanied report for more details
        adam.apply_gradients(zip(gradients, [layer1_weights, layer2_weights, layer1_bias, layer2_bias]))
        
        #Test model on validation data
        test_pred_y = forward_pass(X_val, layer1_weights, layer2_weights, layer1_bias, layer2_bias)             
        test_loss = cross_entropy(test_pred_y, y_val)
        te_loss.append(test_loss)
        te_acc.append(calculate_accuracy(test_pred_y, y_val, datatype))
    
    plt.title("Question1_1_1")
    plt.plot(te_loss, label="Validation Loss")
    plt.plot(tr_loss, label="Train Loss")
    plt.plot(te_acc, label="Validation Accuracy")
    plt.plot(tr_acc, label="Train Accuracy")
    plt.ylim((0,1))    
    plt.legend()
    plt.show()
    


def pre_process():
    """
    Supplied Code for pre-processing Fashion MNIST dataset. Returns target class values and training data for training and validation

    Returns
    -------
    tr_x : NUMPY N-D ARRAY
        X Training Data.
    tr_y : NUMPY N-D ARRAY
        y target class values 1 hot encoded (training data).
    te_x : NUMPY N-D ARRAY
        X Test Data.
    te_y : NUMPY N-D ARRAY
        y test target class values 1 hot encoded (test data).

    """
    
    fashion_mnist = tf.keras.datasets.fashion_mnist 
    (tr_x, tr_y), (te_x, te_y) = fashion_mnist.load_data()
    tr_x = tr_x.reshape(tr_x.shape[0], 784)
    te_x = te_x.reshape(te_x.shape[0], 784)
    tr_x = tr_x / 255.0
    te_x = te_x / 255.0
    tr_y = np_utils.to_categorical(tr_y,10)
    tr_y = tr_y.T
    te_y = np_utils.to_categorical(te_y,10)
    te_y = te_y.T

    return tr_x, tr_y, te_x, te_y



if __name__ == '__main__':
    print("Local Devices: \n", device_lib.list_local_devices())
    main()
    

Local Devices: 
 [name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 16563406014209350229
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14674281152
locality {
  bus_id: 1
  links {
  }
}
incarnation: 4312557326448041465
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
]
Iteration 0: Training Loss = 2.3793712373918843 Training Accuracy = 0.9
Iteration 1: Training Loss = 2.1621116195462626 Training Accuracy = 0.9
Iteration 2: Training Loss = 1.9879881180231755 Training Accuracy = 0.9
Iteration 3: Training Loss = 1.8375567253424863 Training Accuracy = 0.9000033333333334
Iteration 4: Training Loss = 1.699264798850866 Training Accuracy = 0.9005933333333334
Iteration 5: Training Loss = 1.5707960580403888 Training Accuracy = 0.9029833333333334
Iteration 6: Training Loss = 1.4539069589961533 Training Accuracy = 0.9056
Iteration 7: Training Loss = 1.3499762986393962 Training Accuracy = 0.90836