In [1]:
import numpy as np

In [2]:
def init_nn(X, Y, hidden_sizes, batch_size):
    """
    Initialize the neural network structure.
    
    Parameters:
    -----------
    X               :   Integer
                        Input shape
    
    Y               :   Integer
                        Output shape
    
    hidden_sizes    :   List
                        List of length equal to number of hidden layers and each entry is the number of
                        neurons in that layer
    
    batch_size      :   Integer
                        Number of batch to be fed in one forward pass
    
    Returns:
    --------
    layer_states    :   List of Arrays
                        A list of arrays containing outputs of each layer. Initialized as an empty array
                        to increase the speed of initialization
    
    weights         :   List of Arrays
                        A list of arrays containing the weights of each layer. Initialized randomly
                        between -1 and 1
    """
    
    # Concatenate sizes of each layer including input and output into a list, generates list of arrays
    layer_sizes = np.concatenate(([X], hidden_sizes, [Y]))
    
    # Create an empty list of arrays to store the neuron outputs (i.e. states) for each layer
    layer_states = [np.empty((batch_size,layer_size)) for layer_size in layer_sizes]
    
    # Initialize the weights of the network given the sizes of the layers
    weights = list()
    for i in range(layer_sizes.shape[0]-1):
        
        # Randomly initialize the weights for each layer between -1 and 1
        weights.append(np.random.uniform(-1,1,size=[layer_sizes[i],layer_sizes[i+1]]))
    
    return layer_states, weights


In [3]:
def sigmoid(x):
    """
    Compute the sigmoid of input `x` element-wise

    Parameters
    ----------
    x           :   Float or Array
                    input

    Returns
    -------
    sigmoid(x)  :   Float or Array
                    sigmoid applied to input `x` element-wise
    """
    
    # Return the sigmoid formula, computed element-wise
    return 1./(1.+np.exp((-1)*x))

def softmax(x):
    """
    Computes the softmax of input `x`,

    Parameters
    ----------
    x           :   Array
                    (N x dim) array with N samples by p dimensions. dim=10 for MNIST classification. 

    Returns
    -------
    softmax(x)  :   float or array_like
                    softmax applied to `x` along the first axis.
    """
    
    # Compute the exponent only once
    exponent = np.exp(x)
    
    # Return the softmax function
    return exponent/exponent.sum(axis=1,keepdims=True)


In [4]:
def feedforward(batch, layer_states, weights):
    """
    Takes the input batch and feeds it through the generated network ONCE.
    Returns the output of last layer fed through softmax function and updated layer outputs
    
    Possible Modification:
    feeding the last output to softmax can be done outside the function and
    this func can return only updated layer outputs
    OOOR
    maybe there is no need to return the updated version of neuron values
    return only the output (without softmax)
    
    Parameters:
    -----------
    batch           :   Array
                        Input batch of dimension [batch_size x Input_dimension] 
    
    layer_states    :   List of Arrays
                        Output arrays of each layer
    
    weights         :   List of Arrays
                        Weights array of each layer 
    
    Returns:
    --------
    output          :   Array
                        Output of the last layer (output layer)
    
    layer_states    :   List of Arrays
                        UPDATED output arrays of each layer
    """
    
    # Set the state of input layer to the input batch
    #layer_states[0] = batch
    
    # For testing randomly initialize the input layer
    h_l = np.random.uniform(-1,1,size=[batch_size,X])
    layer_states[0] = h_l
    
    for i,weight in enumerate(weights):
        #h_l = sigmoid(h_l.dot(weight))
        #hidden_layers[i+1]=h_l
        
        # Calculate the dot product of current layer's neuron values and weights and feed it to sigmoid
        layer_states[i+1] = sigmoid(layer_states[i].dot(weight))
    
    # Feed the output layers neuron values into softmax function 
    output = softmax(layer_states[-1])
    
    # Return the outputs and updated version of neuron values
    return output, layer_states


In [5]:
# Initialize the input and output shape
X = 784
Y = 10

# Initialize the batch size and hidden layers shape
batch_size = 8
hidden_sizes = [4,2,3]

# Initialize the network structure
layer_states, weights = init_nn(X, Y, hidden_sizes, batch_size)

# Feed forward the input - once
output, layer_states = feedforward(None, layer_states, weights)

print("asd")

KeyboardInterrupt: 

In [None]:
def d_sigmoid(x):
    """
    Calculate derivative of sigmoid activation based on sigmoid output.

    Parameters
    ----------
    sigmoid_out : array_like
        Output values processed by a sigmoid function.
    
    Returns
    -------
    sigmoid_prime(h) : array_like
        Derivative of sigmoid, based on value of sigmoid.
    """
    return x*(1-x)

In [None]:
def back_prop(output,batch_y,hidden_layers,weights,batch_size,lr):
        """
        Calculate derivative of sigmoid activation based on sigmoid output.

        Parameters
        ----------
        output : array_like
            Forward pass output of the MLP
        batch_y : array_like
            True labels for the samples in the batch
        hidden_layers : list
            List of hidden layer outputs  
        weights : array_like
            Array of weight matricies
        lr : float
            Learning rate for SGD
        batch_size : int
            Size of a training mini-batch

        Returns
        -------
        weights : array_like
            Array of weight matricies, updated from the backpropagation.
    
        """
        delta_t = (output - batch_y)*sigmoid_prime(hidden_layers[-1])
        for i in range(1,len(weights)+1):
            weights[-i]-=lr*(hidden_layers[-i-1].T.dot(delta_t))/batch_size
            delta_t = sigmoid_prime(hidden_layers[-i-1])*(delta_t.dot(weights[-i].T))
        return weights

In [None]:
def __back_prop(self,batch_y):
        # Update the weights of the network through back-propagation
        delta_t = (self.__out - batch_y)*self.__sigmoid_prime(self.__h[-1])
        for i in range(1,len(self.weights)+1):
            self.weights[-i]-=self.lr*(self.__h[-i-1].T.dot(delta_t))/self.batch_size
            delta_t = self.__sigmoid_prime(self.__h[-i-1])*(delta_t.dot(self.weights[-i].T))