In [1]:
# Library imports
import numpy as np
from scipy.stats import norm

In [2]:
# Load datasets
train_df = np.load("./Assignment1-Dataset/train_data.npy")
train_label = np.load("./Assignment1-Dataset/train_label.npy")
test_df = np.load("./Assignment1-Dataset/test_data.npy")
test_label = np.load("./Assignment1-Dataset/test_label.npy")

In [3]:
# Sanity checks
print(train_df.shape)
print(train_label.shape)
print(test_df.shape)
print(test_label.shape)

(50000, 128)
(50000, 1)
(10000, 128)
(10000, 1)


In [4]:
def generate_gaussian_weights(num_neurons, num_features):
    '''
    Generate weights taken from the Gaussian distribution, and based on the number of neurons within the hidden layer
    as well as the number of features of the dataset
    
    Output is weights matrix of size (num_neurons, num_features)
    '''
    
    weights = norm.rvs(size = [num_neurons, num_features])
    
    return weights

def generate_gaussian_bias(num_neurons):
    '''
    Generate bias vector from the Gaussian distribution, and based on the number of neurons within the hidden layer
    
    Output is bias vector of size (num_neurons)
    '''
    
    bias = norm.rvs(size = num_neurons)
    
    return bias


In [9]:
weights_l1 = generate_gaussian_weights(8, train_df.shape[1])
print(f"Weights matrix shape for hidden layer 1: {weights_l1.shape}")
bias_l1 = generate_gaussian_bias(8)
print(f"Bias vector shape for hidden layer 1: {bias_l1.shape}")

Weights matrix shape for hidden layer 1: (8, 128)
Bias vector shape for hidden layer 1: (8,)


In [5]:
def calc_z(data_vector, weights_matrix, bias_vector):
    '''
    Calculate the z value for all the neurons within the specific hidden layer, obtained by taking the dot product
    between the weights matrix and data vector.  The bias vector is then added onto the product of the two.
    
    The output vector then represents the input value to be used for the activation function of all the neurons within
    the specific hidden layer.
    '''
    
    return weights_matrix.dot(data_vector) + bias_vector

In [12]:
z.shape

(8,)

In [21]:
def run_activation_func(activation_func, z):
    '''
    Calculates the value after the z has been computed and puts it inside the non-linear activation function
    that we have for that hidden layer
    '''
    
    if activation_func == 'relu':
        return np.maximum(0, z)
    
    if activation_func == 'softmax':
        return np.divide(np.exp(z), np.sum(np.exp(z)))

In [27]:
# Let's do a test-run of the functions above in setting up feedforward

hidden_layers = 3 # this should be int
num_neurons = [5,3,6] # this should be a list containing int per hidden layer
num_classes = np.unique(train_label).size

weight_matrix = []
bias_vector = []

# initialise the weights
for layer_num, layer in enumerate(range(hidden_layers)):
    
    # If we are instantiating the details for the first hidden layer, then make the following adjustments
    # which would otherwise be not required for subsequent hidden layers
    if layer_num == 0:
        # The input features would be the shape of our dataset instead of num of features from previous layer
        num_input_features = train_df.shape[1]
    else:
        num_input_features = num_neurons[layer_num - 1]
    
    # check how many neurons should be in this layer
    neuron_num = num_neurons[layer_num]
    
    layer_weights = generate_gaussian_weights(neuron_num, num_input_features)
    layer_bias = generate_gaussian_bias(neuron_num)
    
    weight_matrix.append(layer_weights)
    bias_vector.append(layer_bias)
    
    print(f'Weight and bias generated for hidden layer {layer_num + 1} with weight shape {weight_matrix[layer_num].shape} and bias shape of {bias_vector[layer_num].shape}')

   

# insantiate the parts for the output layer
# need to be very careful with the use of -1 indices, in the event that we incorporate output layer to our hidden layer variables
weight_matrix.append(generate_gaussian_weights(num_classes, num_neurons[-1]))
bias_vector.append(generate_gaussian_bias(num_classes))


# feedforward part
layer_output = []
for layer_num, layer in enumerate(range(hidden_layers)):
    
    if layer_num == 0:
        input_data = train_df[0] # hard coding this for now, will set batches later
    else:
        # extract the output of the previous layer
        input_data = layer_output[layer_num - 1]
        
    z = calc_z(input_data, weight_matrix[layer_num], bias_vector[layer_num])
    a = run_activation_func('relu', z)
    
    layer_output.append(a)
    
    
# Calculation for the output layer
z = calc_z(layer_output[-1], weight_matrix[-1], bias_vector[-1])
a = run_activation_func('softmax', z)
layer_output.append(a)

Weight and bias generated for hidden layer 1 with weight shape (5, 128) and bias shape of (5,)
Weight and bias generated for hidden layer 2 with weight shape (3, 5) and bias shape of (3,)
Weight and bias generated for hidden layer 3 with weight shape (6, 3) and bias shape of (6,)


In [11]:
print(f'This should equal the number of hidden layers: {len(layer_output)}')
print(f'Example output for the first layer {layer_output[0].shape}')

This should equal the number of hidden layers: 3
Example output for the first layer (5,)


In [13]:
len(layer_output)

3

In [28]:
layer_output[3]

array([4.59576680e-013, 2.35207458e-059, 4.15921888e-112, 1.31809348e-029,
       2.13782186e-014, 7.34339355e-095, 6.64429374e-054, 2.21452093e-069,
       1.43755567e-086, 1.00000000e+000])