In [1]:
import numpy as np
import matplotlib as mpl
mpl.use('nbagg')
import matplotlib.pyplot as plt
plt.ioff()
import tensorflow as tf

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [2]:
# Load the fashion MNIST data set readily available on keras
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

In [3]:
# View dimensions and some training images
print("x_train:", x_train.shape)
print("y_train:", y_train.shape)
print("x_test:", x_test.shape)
print("y_test:", y_test.shape)

x_train: (60000, 28, 28)
y_train: (60000,)
x_test: (10000, 28, 28)
y_test: (10000,)


In [4]:
# The creating a dictionary containing the labels
label_dict = {
    0: "T-shirt/top",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle boot"
}

In [5]:
# Normalising and flattening inputs
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1] * x_train.shape[2])
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1] * x_test.shape[2])

x_train = x_train.astype(np.float32) / 255
x_test = x_test.astype(np.float32) / 255


In [6]:
# One-hot encoding outputs
def get_indexed_ndarray(idx):
    out = np.zeros(10)
    out[idx] = 1
    return out

y_test = np.array([get_indexed_ndarray(ele) for ele in y_test])
y_train = np.array([get_indexed_ndarray(ele) for ele in y_train])

In [7]:
# Final shapes of data sets
print("x_train:", x_train.shape, "type", x_train.dtype)
print("y_train:", y_train.shape, "type", y_train.dtype)
print("x_test:", x_test.shape, "type", x_test.dtype)
print("y_test:", y_test.shape, "type", y_test.dtype)

x_train: (60000, 784) type float32
y_train: (60000, 10) type float64
x_test: (10000, 784) type float32
y_test: (10000, 10) type float64


In [8]:
class NN:
    """
    This class represents a fully-connected neural network
    along with its training, evaluation and prediction functions.

    It also defines the required activation and loss functions.
    """
    def __init__(self, layer_sizes, random_seed = 42):
        """
        Parameters:
            layer_sizes - a Tuple of integers

        layer_sizes takes the number of neurons in every layer
        starting from output to input layers. It should at least
        2 integers. Eg. (10, 32, 128, 784)
        """
        self.num_of_layers = len(layer_sizes)
        self.layers = layer_sizes

        self.weights = [np.random.rand(self.layers[i-1], self.layers[i]) for i in range(1, self.num_of_layers)]
        self.biases = [np.random.rand(self.layers[i], 1) for i in range(0, self.num_of_layers - 1)]

        self.activations = [np.zeros((self.layers[i], 1), dtype=np.float64) for i in range(0, self.num_of_layers - 1)]
        self.err_derivatives = [np.zeros((self.layers[i], 1), dtype=np.float64) for i in range(0, self.num_of_layers - 1)]
    
    def softmax(self, x, derivative=False):
        """
        Parameters:
            x: a 2D numpy array with shape (activations, batch_size)
            derivative: A boolean (default is False)
        
        The softmax function is applied to each column and a numpy
        array of shape (activations, batch_size) is returned.

        If derivative is True, the function gives the derivative
        of the softmax function at the given values.
        """
        out = np.zeros(x.shape, dtype=np.float64)
        if not derivative:
            for i in range(0, x.shape[1]):
                out[:, i] = np.exp(x[:, i]) / np.sum(np.exp(x[:, i]))
            return out
        else:
            softmax_of_in = self.softmax(x)
            out = softmax_of_in * (1 - softmax_of_in)
            return out
    
    def relu(self, x, derivative=False):
        """
        Parameter:
            x: a 2D numpy array with shape (activations, batch_size)
            derivative: A boolean (default is False)
        
        Applies ReLU function and returns the values as an array
        of shape (activations, batch_size)

        If derivative is True, the function gives the derivative
        of the ReLU function at the given value.
        """
        if not derivative:
            return np.where(x >= 0, x, 0)
        else:
            return np.where(x >= 0, 1, 0)

    def bin_cross_entropy_loss(self, error, derivative=False):
        """
        Parameter:
            error: An array of shape (errors, 1)
            derivative: A boolean (default is False)
        
        Returns an array of shape (errors, 1) with the values of
        calculated from the function.

        If derivative is True, the function gives the derivative
        of the binary cross entropy loss function at the given values.
        """
        non_one_array = np.where(error == 1, 0.9999, error)
        if not derivative:
            return ((-1) * np.log(1 - non_one_array))
        else:
            return (1 / (1 - non_one_array))

    def forward_propagation(self, input_array):
        pass

In [9]:
# Checking that the matrices have been initialised correctly
testNN = NN((10, 32, 128, 784))

print("Weights:")
for weight in testNN.weights:
    print(weight.shape, end=", ")
print(testNN.weights[0].dtype);
print("\nBiases:")
for bias in testNN.biases:
    print(bias.shape, end=", ")
print(testNN.biases[0].dtype)
print("\nActivations:")
for activation in testNN.activations:
    print(activation.shape, end=", ")
print(testNN.activations[0].dtype)
print("\nerr_derivatives:")
for err_derivative in testNN.err_derivatives:
    print(err_derivative.shape, end=", ")
print(testNN.err_derivatives[0].dtype)


Weights:
(10, 32), (32, 128), (128, 784), float64

Biases:
(10, 1), (32, 1), (128, 1), float64

Activations:
(10, 1), (32, 1), (128, 1), float64

err_derivatives:
(10, 1), (32, 1), (128, 1), float64


In [10]:
# Checking that the activation and loss functions are running correctly
testNN = NN((10, 32, 128, 784))

test_arr = np.array([[0, 2, 4], [2, 1, -5], [9, 3, 2], [1, 2, 1]], dtype=np.float64)
test_arr_2 = np.array([0.01, 0.3, 0.999, 1]).reshape(4, 1)

print("Test Array 1:")
print(test_arr)
print("Test Array 2:")
print(test_arr_2)

print("Softmax:")
print(testNN.softmax(test_arr))
print("Softmax Derivative:")
print(testNN.softmax(test_arr, derivative=True))
print("ReLU:")
print(testNN.relu(test_arr))
print("ReLU Derivative:")
print(testNN.relu(test_arr, derivative=True))
print("bin_cross_entropy_loss")
print(testNN.bin_cross_entropy_loss(test_arr_2))
print("bin_cross_entropy_loss Derivative:")
print(testNN.bin_cross_entropy_loss(test_arr_2, derivative=True))

print()

print("Test Array 1:")
print(test_arr)
print("Test Array 2:")
print(test_arr_2)

Test Array 1:
[[ 0.  2.  4.]
 [ 2.  1. -5.]
 [ 9.  3.  2.]
 [ 1.  2.  1.]]
Test Array 2:
[[0.01 ]
 [0.3  ]
 [0.999]
 [1.   ]]
Softmax:
[[1.23240871e-04 1.96611933e-01 8.43706877e-01]
 [9.10633710e-04 7.23294881e-02 1.04121700e-04]
 [9.98631122e-01 5.34446645e-01 1.14183309e-01]
 [3.35003420e-04 1.96611933e-01 4.20056920e-02]]
Softmax Derivative:
[[1.23225683e-04 1.57955681e-01 1.31865583e-01]
 [9.09804457e-04 6.70979333e-02 1.04110859e-04]
 [1.36700418e-03 2.48813429e-01 1.01145481e-01]
 [3.34891193e-04 1.57955681e-01 4.02412138e-02]]
ReLU:
[[0. 2. 4.]
 [2. 1. 0.]
 [9. 3. 2.]
 [1. 2. 1.]]
ReLU Derivative:
[[1 1 1]
 [1 1 0]
 [1 1 1]
 [1 1 1]]
bin_cross_entropy_loss
[[0.01005034]
 [0.35667494]
 [6.90775528]
 [9.21034037]]
bin_cross_entropy_loss Derivative:
[[1.01010101e+00]
 [1.42857143e+00]
 [1.00000000e+03]
 [1.00000000e+04]]

Test Array 1:
[[ 0.  2.  4.]
 [ 2.  1. -5.]
 [ 9.  3.  2.]
 [ 1.  2.  1.]]
Test Array 2:
[[0.01 ]
 [0.3  ]
 [0.999]
 [1.   ]]
