In [8]:
import numpy as np
from read_dataset import read_dataset
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from displayData import displayData
from randInitializeWeights import randInitializeWeights
from unroll_params import unroll_params
from roll_params import roll_params
import scipy.optimize as opt
from predict import predict
from backwards import backwards
from checkNNGradients import checkNNGradients
from sigmoid import sigmoid
from sigmoidGradient import sigmoidGradient
from debugInializeWeights import debugInitializeWeights

In [26]:
def costFunction(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the cost function of the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Unroll Params
    Theta = roll_params(nn_weights, layers)

    # ================================ TODO ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a 
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = np.zeros((num_labels, m))
    for i in range(len(y)):
        yv[int(y[i])] = 1  # TODO: the int conversion is maybe not useful
    yv = np.transpose(yv)

    # ================================ TODO ================================
    # In this point calculate the cost of the neural network (feedforward)
    x = np.copy(X)

    for i in range(num_layers - 1):
        s = np.shape(Theta[i])
        theta = Theta[i][:, 1:s[1]]
        x = np.dot(theta, np.transpose(x))
        for j in range(m):
            x[:, j] += Theta[i][:, 0]
        x = np.transpose(sigmoid(x))

    cost = (yv * np.log(x) - (1 - yv) * np.log(1 - x)) / m
    cost = -np.sum(cost)
    
    for i in range(num_layers - 1):
        cost += lambd * np.sum(Theta[i] ** 2) / (2 * m)

    return cost

In [27]:
def checkNNCost(lambd):

    input_layer_size  = 3;
    hidden_layer_size = 5;
    num_labels = 3;
    m          = 5;
    layers     = [3, 5, 3]
    
    Theta = [] 
    Theta.append(debugInitializeWeights(hidden_layer_size, input_layer_size))
    Theta.append(debugInitializeWeights(num_labels, hidden_layer_size))
    nn_params = unroll_params(Theta)
    
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = np.remainder(np.arange(m)+1, num_labels)
 
    cost = costFunction(nn_params, layers, X, y, num_labels, lambd)
    print('Cost: ' + str(cost))

In [31]:
lambd = 0.0
checkNNCost(lambd)

Cost: 2.03232082178


In [32]:
lambd = 3.0
checkNNCost(lambd)

Cost: 2.09153578476
