In [104]:
import numpy as np

In [105]:
def sigmoid(x):
    return(1 / (1 + np.exp(-x)))

def sigmoidPrime(x):
    return(sigmoid(x)*(1-sigmoid(x)))

def getActivationFunctionPrime(activation_fn):
    if activation_fn == sigmoid:
        return sigmoidPrime
    else:
        return None


In [106]:
def cross_entropy(y_hat, y, reg=False, thetas=None):
    '''
        Args:
            y_hat(np.ndarray) : predictions
            y(np.ndarray) : acutal values
            reg(python.Boolean) : to regularize or not
            thetas(np.ndarray) : thetas to be regularized
            
        Returns:
            J(float) : Cost
    '''
    m = y_hat.shape[0]
    
    J = - 1 * (1/m) * np.sum( ( np.log(y_hat)*(y) + np.log(1-y_hat)*(1-y) ) )

    if reg:
        J = J + (1/(2*m)) * ( np.sum(np.square(thetas)) )
    
    return J


In [107]:
class Layer:
    
    def __init__(self, weights=None, activation_fn=None, add_bias_node=False):
        '''
            Args:
                activation_fn(python.function) : activation function.
                weights(np.ndarray) : weigths
                add_bias_node(python.Boolean) : if bias node needs to be added. If True, a column of 1s are added
                                                after the activation function is applied.
        '''
        self.weights = None
        self.activation_fn = activation_fn
        self.activation_fn_prime = getActivationFunctionPrime(self.activation_fn) 
        self.add_bias_node = add_bias_node
        self.input = None
        self.z = None
        self.output = None
        
        if type(weights) is np.ndarray:
            self.weights = weights
        
    
    def forward(self, input):       
        '''
            Args:
                input(np.array) : input or activations from previous layer.
                
            Returns:
                a(np.array) : activation of this layer. [num samples x num of nodes in this layer]
                              If add_bias_node is True, an extra column of 1s are added.
        '''
        self.input = input
        print("\tInput : ", input.shape)
        
        if self.activation_fn != None:
            print("\tweights :", input.shape," X input.T:", self.weights.T.shape)
            z = input.dot(self.weights.T)
            self.z = z
            print("\tZ : ", z.shape)
            
            a = self.activation_fn(z)
            
        else:
            a = input
            
        if self.add_bias_node:
            print("\tAdding a column vector (",a.shape[0],"x1 ) to a", a.shape)
            a = np.c_[np.ones((a.shape[0],1)), a]
        
        print("\tOuput : ", a.shape)
        self.output = a
        return a
    
    
    def backward(self, back_prop, last_layer=False, weights_next=None):
        '''
            Args:
                back_prop(np.array) : info back propagated from the next layer.
                                      Can be acutal output or deltas.
                
            Returns:
                delta(np.ndarray) : delta to be backpropagated.
                
        '''
        if last_layer:
            print("y_hat", self.output.shape, " - back_prop", back_prop.shape)
            d = (self.output - back_prop) 
        else:
            print("Weights_Next", weights_next[:,1:].shape, "x back_prop", back_prop.shape, " .* prime", self.z.shape)
            d = back_prop.dot(weights_next[:,1:]) * self.activation_fn_prime(self.z)
        
        gradient = d.T.dot(self.input)
            
        return d, gradient, self.weights
 

In [108]:
class NeuralNetwork:
    def __init__(self, cost_fn):
        self.layers = []
        self.cost_fn = cost_fn
    
    
    def add(self, layer):
        self.layers.append(layer)
        
        
    def train(self, input, output):
        # forward propagation
        for idx, layer in enumerate(self.layers):
            print("\nLayer ", idx+1 )
            input = layer.forward(input)
        
        y_hat = input
        print("\nPredictions\n", y_hat[0,:])
        
        # calculate cost
        cost = self.cost_fn(y_hat, output, reg=0)
        print("\nCost : ", cost)
        
        # TODO: backward propagation
        d = output
        weights = None
        idx = len(self.layers)-1
        while idx >= 1:
            print("\nLayer ", idx+1)
            d, gradient, weights = self.layers[idx].backward(d, weights_next=weights, last_layer=(idx==len(self.layers)-1) )
            print(gradient/input.shape[0])
            idx = idx - 1
        
        # return y_hat, cost
    


In [109]:
from scipy.io import loadmat
import pandas as pd

data = loadmat('data/ex4data1.mat')

y = data['y']
y = pd.get_dummies(y.ravel()).as_matrix() 
print('y.shape = ',y.shape)

X = data['X']
print('X.shape : ', X.shape)

y.shape =  (5000, 10)
X.shape :  (5000, 400)


  import sys


In [110]:
weights = loadmat('data/ex4weights.mat')
theta1, theta2 = weights['Theta1'], weights['Theta2']
print('theta1 :', theta1.shape)                             # Input size : 401 including bias
                                                            # Num of hidden units : 10
print('theta2 :', theta2.shape)                             # Num of lables : 10
params = np.r_[theta1.ravel(), theta2.ravel()]
print('params :', params.shape)

theta1 : (25, 401)
theta2 : (10, 26)
params : (10285,)


In [111]:
nn = NeuralNetwork(cost_fn=cross_entropy)
input_layer = Layer(activation_fn=None, add_bias_node=True)
hidden_layer = Layer(activation_fn=sigmoid, weights=theta1, add_bias_node=True)
output_layer = Layer(activation_fn=sigmoid, weights=theta2, add_bias_node=False)

nn.add(input_layer)
nn.add(hidden_layer)
nn.add(output_layer)
nn.train(X, y)


Layer  1
	Input :  (5000, 400)
	Adding a column vector ( 5000 x1 ) to a (5000, 400)
	Ouput :  (5000, 401)

Layer  2
	Input :  (5000, 401)
	weights : (5000, 401)  X input.T: (401, 25)
	Z :  (5000, 25)
	Adding a column vector ( 5000 x1 ) to a (5000, 25)
	Ouput :  (5000, 26)

Layer  3
	Input :  (5000, 26)
	weights : (5000, 26)  X input.T: (26, 10)
	Z :  (5000, 10)
	Ouput :  (5000, 10)

Predictions
 [1.12661530e-04 1.74127856e-03 2.52696959e-03 1.84032321e-05
 9.36263860e-03 3.99270267e-03 5.51517524e-03 4.01468105e-04
 6.48072305e-03 9.95734012e-01]

Cost :  0.2876291651613189

Layer  3
y_hat (5000, 10)  - back_prop (5000, 10)
[[ 6.28737643e-04  7.50946274e-04  9.87964596e-05  1.48819864e-03
   7.31802078e-04  1.38113760e-03 -1.59325422e-04 -6.68870887e-04
  -1.24979363e-03 -9.66225987e-05  7.19244384e-04 -5.10976177e-04
   1.11120644e-03 -6.43551911e-04 -6.95182470e-04 -9.47091610e-04
   2.00794722e-04  9.50724940e-04 -5.42000276e-04 -5.05540551e-05
   2.22327563e-04  5.06964221e-04  2.