In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [82]:
import numpy as np
class FeedForwardNN:
  def __init__(self, num_input_features = 100, num_hidden_layers = 3, \
               hidden_layer_sizes = 32, num_classes = 10):
    '''
    Initializes the architecture of the Feed-Forward NN. The weights and biases
    are initialized randomly.
    It is assumed that all the parameters passed to this function are of the
    correct type (No type checking is done).
    If hidden_layer_sizes is an integer, it is assumed that all the hidden 
    layers have the same size. The other option is to specify it as a list of 
    integers starting with the size of hidden layer adjacent to the input layer.
    Number of neurons in the output layer = num_classes
    '''
    self.__num_input_features = num_input_features
    self.__num_hidden_layers = num_hidden_layers
    self.__hidden_layer_sizes = hidden_layer_sizes if type(hidden_layer_sizes) == list \
                                else num_hidden_layers*[hidden_layer_sizes]
    self.__num_classes = num_classes
    self.__weights, self.__biases = [], []
    '''
    The weights are specified as a list. The i th element of the list is a 2D
    numpy array representing the weights of edges from i th to (i+1) th layer.
    Here, i = 0 is the input layer, i = 1 is the first hidden layer and 
    i = (num_hidden_layers + 1) is the output layer.
    Thus, there are (num_hidden_layers + 1) numpy arrays in the list. Each of
    these arrays is initialized randomly.
    '''
    if num_hidden_layers > 0:
      self.__weights.append(np.random.randn(num_input_features, self.__hidden_layer_sizes[0]))
      for layer_no in range(num_hidden_layers-1):
        self.__weights.append(np.random.randn(self.__hidden_layer_sizes[layer_no], self.__hidden_layer_sizes[layer_no+1]))
      self.__weights.append(np.random.randn(self.__hidden_layer_sizes[num_hidden_layers-1], num_classes))
    else: # If no hidden layers
      self.__weights.append(np.random.randn(num_input_features, num_classes))
    '''
    The biases are specified as a list. The i th element of the list is a 
    numpy vector representing the biases for the i th layer.
    Here, i = 0 is the first hidden layer, i = (num_hidden_layers + 1) is the
    output layer.
    Thus, there are (num_hidden_layers + 1) numpy arrays in the list. Each of
    these vectors is initialized randomly.
    '''
    for hidden_layer_size in self.__hidden_layer_sizes:
      self.__biases.append(np.random.randn(hidden_layer_size, ))
    self.__biases.append(np.random.randn(num_classes, ))

  
  def evaluate(self, Xinput):
    '''
    Xinput should be a 2D numpy array with dtype = float. Shape along the 0th dimension should be
    num_samples and shape along the 1st dimension should be self.__num_input_features.
    The output is a 2D numpy array of having shape (num_samples, self.__num_classes)
    where each row sums to 1 and represents a probability distribution over the classes.    
    Softmax activation function is used to obtain the probability distribution in the output layer.
    Logistic activation function is used in each of the hidden layers.
    '''
    Xcompute = Xinput.copy() #Creating a copy to avoid making changes the original data
    sigmoid_func = np.vectorize(lambda x: 1/(1+np.exp(-x)))
    for Wmat, bvec in zip(self.__weights[:self.__num_hidden_layers], self.__biases[:self.__num_hidden_layers]):
      Xcompute = sigmoid_func(Xcompute.dot(Wmat) + bvec)

    Xcompute = Xcompute.dot(self.__weights[-1]) + self.__biases[-1]
    # Computing softmax for the output layer
    Xcompute = np.exp(Xcompute) # Asuming no overflow due to exponentiation
    inv_row_sums = np.reciprocal(np.sum(Xcompute, axis = 1))
    Xcompute = np.multiply(inv_row_sums, Xcompute.T).T
    return Xcompute


In [86]:
# Testing the Feed-Forward NN
# Test 1
nn1 = FeedForwardNN(num_input_features = 90, num_hidden_layers = 4, hidden_layer_sizes = [32,64,100,12], num_classes = 35)
Xinput1 = np.random.randn(20, 90)
Ypred1 = nn1.evaluate(Xinput1)
print('Shape of Ypred1 = {}'.format(Ypred1.shape)) # Checking if all rows sum to 1
print(np.sum(Ypred1, axis = 1))
# Test 2
nn2 = FeedForwardNN(num_input_features = 200, num_hidden_layers = 3, hidden_layer_sizes = 64, num_classes = 10)
Xinput2 = np.random.randn(30, 200)
Ypred2 = nn2.evaluate(Xinput2)
print('Shape of Ypred2 = {}'.format(Ypred2.shape)) # Checking if all rows sum to 1
print(np.sum(Ypred2, axis = 1))
# Test 3 (No hidden layer)
nn3 = FeedForwardNN(num_input_features = 50, num_hidden_layers = 0, hidden_layer_sizes = [], num_classes = 15)
Xinput3 = np.random.randn(40, 50)
Ypred3 = nn3.evaluate(Xinput3)
print('Shape of Ypred3 = {}'.format(Ypred3.shape)) # Checking if all rows sum to 1
print(np.sum(Ypred3, axis = 1))

Shape of Ypred1 = (20, 35)
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Shape of Ypred2 = (30, 10)
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1.]
Shape of Ypred3 = (40, 15)
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
