## Imports

In [1]:
import numpy as np
import scipy.special

## Neural Network Model

In [2]:
class NeuralNetwork:
    def __init__(self, inputnodes=784, hiddennodes=200, outputnodes=10, learningrate=0.1, batch_size=1, epochs=10):
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes

        self.wih = np.random.normal(
            0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
        self.who = np.random.normal(
            0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes))

        # set the learning rate
        self.lr = learningrate

        # set the batch size
        self.bs = batch_size

        # set the number of epochs
        self.ep = epochs

        self.E = []

        self.results = []

        self.activation_function = lambda x: scipy.special.expit(x)
        
        self.activations = []
        
        self.encoded_images = []
        
        self.decoded_images = []
        
        pass

    def batch_input(self, X, y):  # (self, train_inputs, targets):
        for i in range(0, len(X), self.bs):
            yield (X[i:i + self.bs], y[i:i + self.bs])

    def train(self, train_inputs, targets_list):
        for e in range(self.ep):
            print("Training epoch#: ", e)

            sum_error = 0.0

            for (batchX, batchY) in self.batch_input(train_inputs, targets_list):
                # creating variables to store the gradients
                delta_who = 0
                delta_wih = 0

                # iterate through the inputs sent in
                for inputs, targets in zip(batchX, batchY):
                    # convert  inputs list to 2d array
                    inputs = np.array(inputs,  ndmin=2).T
                    targets = np.array(targets, ndmin=2).T

                    # calculate signals into hidden layer
                    hidden_inputs = np.dot(self.wih, inputs)
                    # calculate the signals emerging from the hidden layer
                    hidden_outputs = self.activation_function(hidden_inputs)
                    
                    ## store hidden activations
                    self.activations.append(hidden_outputs.flatten())

                    # calculate signals into final output layer
                    final_inputs = np.dot(self.who, hidden_outputs)
                                    
                    self.encoded_images.append(final_inputs)
                    
                    # calculate the signals emerging from final output layer
                    final_outputs = self.activation_function(final_inputs)
                    
                    # uncomment below for Task5!
                    self.decoded_images.append(final_outputs)

                    # to calculate the error we need to compute the element wise diff between target and actual
                    output_errors = targets - final_outputs

                    # Next distribute the error to the hidden layer such that hidden layer error
                    # is the output_errors, split by weights, recombined at hidden nodes
                    hidden_errors = np.dot(self.who.T, output_errors)

                    # for each instance accumilate the gradients from each instance
                    # delta_who are the gradients between hidden and output weights
                    # delta_wih are the gradients between input and hidden weights
                    delta_who += np.dot((output_errors * final_outputs *
                                         (1.0 - final_outputs)), np.transpose(hidden_outputs))
                    delta_wih += np.dot((hidden_errors * hidden_outputs *
                                         (1.0 - hidden_outputs)), np.transpose(inputs))

                    # this is the sum of squared error accumilated over each batced instance
                    sum_error += np.dot(output_errors.T, output_errors)
                pass  # instance

                # update the weights by multiplying the gradient with the learning rate
                # note that the deltas are divided by batch size to obtain the average gradient according to the given batch
                # obviously if batch size = 1 then we simply end up dividing by 1 since each instance forms a singleton batch
                self.who += self.lr * (delta_who / self.bs)
                self.wih += self.lr * (delta_wih / self.bs)
            pass  # batch
            self.E.append(np.asfarray(sum_error).flatten())
            print("errors (SSE): ", self.E[-1])
        pass  # epoch
    
        ## return the activations from the last epoch
        return self.activations[-len(train_inputs):]
    
    ## Encode
    def encode(self, inputs_list):
        inputs = np.array(inputs_list, ndmin=2).T

        # propogate input into hidden layer. This is the start of the forward pass
        hidden_inputs = np.dot(self.wih, inputs)

        # squash the content in the hidden node using the sigmoid function (value between 1, -1)
        hidden_outputs = self.activation_function(hidden_inputs)
        
        return hidden_outputs.flatten()
    
    def query(self, inputs_list):

        inputs = np.array(inputs_list, ndmin=2).T

        # propogate input into hidden layer. This is the start of the forward pass
        hidden_inputs = np.dot(self.wih, inputs)

        # squash the content in the hidden node using the sigmoid function (value between 1, -1)
        hidden_outputs = self.activation_function(hidden_inputs)

        # propagate into output layer and the apply the squashing sigmoid function
        final_inputs = np.dot(self.who, hidden_outputs)

        final_outputs = self.activation_function(final_inputs)
        return final_outputs

    def test(self, test_inputs, test_targets):
        self.results = []

        # go through each test instances
        for inputs, target in zip(test_inputs, test_targets):
            # query the network with test inputs
            # note this returns 10 output values ; of which the index of the highest value
            # is the networks predicted class label
            outputs = self.query(inputs)
            # get the target which has 0.99 as highest value corresponding to the actual class
            target_label = np.argmax(target)
            # get the index of the highest output node as this corresponds to the predicted class
            # this is the class predicted by the ANN
            predict_label = np.argmax(outputs)

            self.results.append([predict_label, target_label])
            pass
        pass
        # flatten results to avoid nested arrays
        self.results = np.asfarray(self.results)

    def evaluate(self, results):
        correct = 0
        for result in results:
            if result[0] == result[1]:
                correct += 1
        return 100 * (correct / len(results))