In [3]:
import numpy as np
from read_dataset import read_dataset
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from displayData import displayData
from randInitializeWeights import randInitializeWeights
from unroll_params import unroll_params
from roll_params import roll_params
from scipy.optimize import *
from predict import predict
from sigmoid import sigmoid
from sigmoidGradient import sigmoidGradient
from debugInializeWeights import debugInitializeWeights
from costFunction import costFunction
from unroll_params import unroll_params
from computeNumericalGradient import computeNumericalGradient

In [4]:
#Reading of the dataset
# You are free to reduce the number of samples retained for training, in order to reduce the computational cost
size_training = 60000     # number of samples retained for training
size_test     = 10000     # number of samples retained for testing
images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test)

In [11]:
def checkNNGradients(lambd):

    input_layer_size  = 3;
    hidden_layer_size = 5;
    num_labels = 2;
    m          = 10;
    layers     = [3, 5, 2]

    # In this point we generate a number of random data
    Theta = [] 
    Theta.append(debugInitializeWeights(hidden_layer_size, input_layer_size))
    Theta.append(debugInitializeWeights(num_labels, hidden_layer_size))

    X = debugInitializeWeights(m, input_layer_size - 1)
    y = np.remainder(np.arange(m)+1, num_labels)
    
    # Unroll parameters
    nn_params = unroll_params(Theta)

    # Compute Numerical Gradient
    numgrad = computeNumericalGradient(nn_params,layers, X, y, num_labels, lambd)

    # Compute Analytical Gradient (BackPropagation)
    truegrad = backwards(nn_params, layers, X, y, num_labels, lambd)

    
    print(np.concatenate(([numgrad], [truegrad]), axis = 0).transpose())
    print("The above two columns must be very similar.\n(Left-Numerical Gradient, Right-Analytical Gradient (BackPropagation)\n")
    
    diff = np.linalg.norm(numgrad - truegrad) / np.linalg.norm(numgrad + truegrad)
    print("\nNote: If the implementation of the backpropagation is correct, the relative different must be quite small (less that 1e-09).")
    print("Relative difference: " + str(diff) + "\n")

In [74]:
def insertOne(x):
    print("INSERTONE")
    print(x.shape)
    s = x.shape
    a = np.ones((s[0], s[1]+1))
    a[:, 1:] = x
    return a

In [81]:
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)
    
    print("np.shape(Theta[0]):")
    print(np.shape(Theta[0]))
    print("np.shape(Theta[1]):")
    print(np.shape(Theta[1]))
    # ================================ TODO ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a 
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = np.zeros((num_labels, m))
    for i in range(len(y)):
        yv[int(y[i])] = 1  # TODO: the int conversion is maybe not the useful
    yv = np.transpose(yv)

    a = []
    z = []
    x = np.copy(X)
    a.append(insertOne(x))
    z.append(x)


    for i in range(num_layers - 1):
        print("shape of x at stage "+str(i))
        print(np.shape(x))

        s = np.shape(Theta[i])
        theta = Theta[i][:, 1:s[1]]
        x = np.dot(x, np.transpose(theta))
        x = x + Theta[i][:, 0]
        z.append(x)
        x = sigmoid(x)
        a.append(insertOne(x))
        
    print("shape of x at the end ")
    print(np.shape(x))

    # You need to return the following variables correctly 
    delta = [np.zeros(w.shape) for w in z]
    delta[num_layers-1] = (x-yv)
    
    print("np.shape(delta[num_layers-1]: )")
    print(np.shape(delta[num_layers-1]))
    
    for i in range(num_layers-2, 0, -1):
        print("computing delta for i="+str(i))
        s = np.shape(Theta[i])
        theta = Theta[i][:, 1:s[1]]
        temp = np.dot(np.transpose(theta), np.transpose(delta[i+1]))
        delta[i] = np.transpose(temp)*sigmoidGradient(z[i])
        print("np.shape(delta[i]): ")
        print(np.shape(delta[i]))

    Delta = []
    for i in range(num_layers-1):
        temp = np.dot(np.transpose(delta[i+1]), a[i])
        print(temp)
        Delta.append(temp)
    
    for d in Delta:
        print(d)
    
    cost = (yv * np.log(x) - (1 - yv) * np.log(1 - x)) / m
    cost = -np.sum(cost)

    somme = 0

    for i in range(num_layers - 1):
        somme += lambd * np.sum(Theta[i] ** 2) / (2 * m)

    cost += somme

    # ================================ TODO ================================
    # In this point implement the backpropagaition algorithm 
    Theta_grad = [d/m for d in Delta]
    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)
    return Theta_grad

In [82]:
lambd = 0.0
checkNNGradients(lambd)

np.shape(Theta[0]):
(5, 4)
np.shape(Theta[1]):
(2, 6)
INSERTONE
(10, 3)
shape of x at stage 0
(10, 3)
INSERTONE
(10, 5)
shape of x at stage 1
(10, 5)
INSERTONE
(10, 2)
shape of x at the end 
(10, 2)
np.shape(delta[num_layers-1]: )
(10, 2)
computing delta for i=1
np.shape(delta[i]): 
(10, 5)
[[ -2.32773805e-01  -1.41648590e-03  -3.12686920e-04   1.07859497e-03]
 [ -6.78272983e-02  -4.14101838e-04  -9.43728661e-05   3.12122084e-04]
 [  1.59654070e-01   9.74386868e-04   2.18584503e-04  -7.38183446e-04]
 [  2.40476843e-01   1.46302085e-03   3.19326390e-04  -1.11795527e-03]
 [  1.00018542e-01   6.09263602e-04   1.36214867e-04  -4.62069189e-04]]
[[-4.90656507 -2.55757089 -2.33508856 -2.50355949 -2.50582419 -2.33439176]
 [-4.91869395 -2.56389308 -2.34086112 -2.50974787 -2.51201866 -2.34016245]]
[[ -2.32773805e-01  -1.41648590e-03  -3.12686920e-04   1.07859497e-03]
 [ -6.78272983e-02  -4.14101838e-04  -9.43728661e-05   3.12122084e-04]
 [  1.59654070e-01   9.74386868e-04   2.18584503e-04  -7.38

In [62]:
range(10,-1)

range(10, -1)

In [15]:
for a in range(10, 0, -1):
    print(a)

10
9
8
7
6
5
4
3
2
1


In [11]:
print(a[0])
print(a[1])

IndexError: range object index out of range