In [1]:
import numpy as np
np.random.seed(0)

from scipy.io import loadmat
from scipy import optimize

import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.image import NonUniformImage
from matplotlib import cm

# 0 Data Structure

In [2]:
pathToDataFile = 'ex4data1.mat'
data = loadmat(pathToDataFile)
pathToWeightsFile = 'ex4weights.mat'
weights = loadmat(pathToWeightsFile)
print(weights['Theta1'].shape)
print(weights['Theta2'].shape)
print(type(weights))

(25, 401)
(10, 26)
<class 'dict'>


# 1 Nural Network
## 1.1 Forward Porpagation
<img src="forwardPropagation.png">

In [3]:
def getData(pathToDataFile):
    data = loadmat(pathToDataFile)
    X = data['X']
    y = data['y']
    return X, y

def generateBeta(layer):
    '''Generate beta-matrix for every layer in Neural Network'''
    betaSet = ()
    for i in range(len(layer)-1):
#         recommendation from Andrew Ng window is ±(6/(inLayer + outLayer))**0.5
        low, high = -(6/(layer[i]+layer[i+1]))**0.5, (6/(layer[i]+layer[i+1]))**0.
        betaSet += (np.random.uniform(low,high,(layer[i+1], layer[i]+1)),)
#         betaSet += (np.zeros((outLayer, inLayer+1)),)
    return betaSet

def flattenBeta(betaSet):
    flatBeta = betaSet[0].flatten()
    for beta in betaSet[1:]:
        flatBeta = np.concatenate((flatBeta, beta.flatten()), axis=-1)
    return flatBeta

def reshapeBeta(Beta, layer):
    splitIndex = 0
    splitIndices = []
    for i in range(len(layer)-2):
        splitIndex += (layer[i]+1)*layer[i+1]
        splitIndices += [splitIndex]
    splitBeta = np.split(Beta, splitIndices)
    reshapedBeta = ()
    for i in range(len(splitBeta)):
        reshapedBeta += (splitBeta[i].reshape(layer[i+1],layer[i]+1),)
    return reshapedBeta
    
def sigmoid(z):
    return 1/(1+np.exp(-z))

def forwardPropagation(flatBeta, layer, flatX, sampleSize):
    '''Forward Propagation is the hypothesis function for Neural Networks'''
    betaSet = reshapeBeta(flatBeta, layer)
#     H_0 (5000, 400)
    H = flatX.reshape(sampleSize, -1)
#     Z_H = ()
    H_byLayer = ()
    for beta in betaSet:
#         print(H.shape)
#         Z_l (5000, k_l); l is the number of layers [0, ...,l]; k is the number of neurons in a layer l [1,...,k]
        Z = np.dot(np.insert(H, 0, 1, axis=1), beta.T)
#         H_l (5000, k_l); l is the number of layers [0, ...,l]; k is the number of neurons in a layer l [1,...,k]
        H = sigmoid(Z)
#         Z_H += ((Z, H),)
        H_byLayer += (H,)
#     H_2 (5000, 10)
    return H_byLayer

def sigmoidGradient(Z):
    return sigmoid(Z)*(1-sigmoid(Z))

def costFunction(flatBeta, layer, flatX, sampleSize, y, yUnique, iLambda = 0.):
    X = flatX.reshape(sampleSize, -1)
    Y = np.array([yUnique]* y.shape[0]) == y
    betaSet = reshapeBeta(flatBeta, layer)
    J = 0
    for n in range(sampleSize):
        x_n = X[n:n+1,:]
        y_n = Y[n:n+1,:]
#         hypothesis vector h_n(1, 10)
        h_n = forwardPropagation(flatBeta, layer, x_n, 1)[len(betaSet)-1]
#         cost function scalar j_n(1, 1) = y_n(1, 10)*h_n.T(10, 1)
        j_n = (- np.dot(y_n, np.log(h_n).T) - np.dot((1-y_n), np.log(1-h_n).T))
        J += j_n
#     regularisation term (R)
    cummulativeR = 0
    for beta in betaSet:
        cummulativeR += np.sum(beta*beta) #element-wise multiplication
    cummulativeR *= iLambda/(2*sampleSize)
    return J[0][0]/sampleSize + cummulativeR

### 1.1.1 Neural Network Initialisation
To initialise a simple neural network, one has to do the following:
1. set the number of neurons in every layer (including input and output layers)
2. extract and flatten input matrix X
3. transform output Y
3. initialise Beat matrix

In [4]:
# Set number of neurons in every layer (including input and output layers)
layer = 400, 25, 10
# Extract and flatten input matrix X
X, y = getData(pathToDataFile)
sampleSize, numVariables = X.shape
flatX = X.flatten()
yUnique = np.unique(y)
# Initialise Beat matrix
betaTest = flattenBeta((weights['Theta1'], weights['Theta2']))
betaInitial = flattenBeta(generateBeta(layer))
print(X.shape)
print(y.shape)
for beta in generateBeta(layer): print(beta.shape)

(5000, 400)
(5000, 1)
(25, 401)
(10, 26)


In [5]:
iLambda = 0
Y = np.array([yUnique]* y.shape[0]) == y
betaSet = reshapeBeta(betaTest, layer)
J = 0
for n in range(sampleSize):
    x_n = X[n:n+1,:]
    y_n = Y[n:n+1,:]
#         hypothesis vector h_n(1, 10)
    h_n = forwardPropagation(betaTest, layer, x_n, 1)[len(betaSet)-1]
#         cost function scalar j_n(1, 1) = y_n(1, 10)*h_n.T(10, 1)
    j_n = (- np.dot(y_n, np.log(h_n).T) - np.dot((1-y_n), np.log(1-h_n).T))
    J += j_n
#     regularisation term (R)
cummulativeR = 0
for beta in betaSet:
    cummulativeR += np.sum(beta*beta) #element-wise multiplication
cummulativeR *= iLambda/(2*sampleSize)
J[0][0]/sampleSize + cummulativeR

0.28762916516131876

## 1.1.2 Forward-Propagation Test

In [6]:
Y = np.array([yUnique]* y.shape[0]) == y
print(Y[0:0+1,:].shape)

(1, 10)


In [7]:
print(forwardPropagation(betaTest, layer, flatX, sampleSize)[1].shape)
print(forwardPropagation(betaTest, layer, X[0:0+1,:], 1)[1].shape)

(5000, 10)
(1, 10)


In [8]:
print(X.shape)
print(X[0][None,:].shape)
# costFunction(betaTest, layer, X.flatten(), sampleSize, y, yUnique, iLambda = 0.)
costFunction(betaTest, layer, X[0:100][None,:].flatten(), 100, y, yUnique, iLambda = 0.)

(5000, 400)
(1, 400)


0.11589058107504865

## 1.1.3 Cost-Function Test
The outputs of the costFunction should be as follows:<br\>
betaTest, X, iLambda=0. — 0.287629165161<br\>
betaTest, X, iLambda=1. — 0.384487796243<br\>
betaTest, X, iLambda=0. — 0.0345203898838<br\>
betaInitial, X, iLambda=1. — 65.5961451562

In [10]:
print(costFunction(betaTest, layer, flatX, sampleSize, y, yUnique, iLambda = 0.))
print(costFunction(betaTest, layer, flatX, sampleSize, y, yUnique, iLambda = 1.))
print(costFunction(betaTest, layer, X[0][None,:].flatten(), 1, y, yUnique, iLambda = 0.))
print(costFunction(betaInitial, layer, flatX, sampleSize, y, yUnique, iLambda = 1.))

0.287629165161
0.384487796243
0.0345203898838
65.5961451562


## 1.2 Back Propagation

In [11]:
def backPropagation(flatBeta, layer, flatX, sampleSize, y, yUnique):
    sampleSize, numVariables = X.shape
    Y = np.array([yUnique]* y.shape[0]) == y
    betaSet = reshapeBeta(flatBeta, layer)

    deltaSet = ()
#     hypothesis matrix E(5000, 10)
    H = forwardPropagation(flatBeta, layer, flatX, sampleSize)
#     error matrix E(5000, 10)
    E = H[len(layer)-2] - Y
    for l in reversed(range(len(layer)-1)):
        E = np.dot(E*sigmoidGradient(H[l]), betaSet[l])[:,1:]
        deltaSet = (np.dot(H[l].T, np.insert(E, 0, 1, axis=1)),) + deltaSet
    flatDelta = flattenBeta(deltaSet)
    return flatBeta + flatDelta/sampleSize

In [12]:
betaInitial = flattenBeta(generateBeta(layer))
a = backPropagation(betaInitial, layer, flatX, sampleSize, y, yUnique)

print(np.sum(a))
print(costFunction(a,layer, flatX, sampleSize, y, yUnique, iLambda = 0.))

16253.0530028
199.953706643


In [13]:
def gradientCheck(beta, layer, sampleSize, epsilon):
    for i in np.random.randint(beta.size, size=10):
        epsilonVector = np.zeros(beta.size)
        epsilonVector[i] = epsilon
        betaPlus = betaMinus = beta
        betaPlus = betaPlus + epsilonVector
        costPlus = costFunction(betaMinus,layer, X, sampleSize, y, yUnique, iLambda = 0.)
        betaMinus = betaMinus - epsilonVector
        costMinus = costFunction(betaMinus,layer, X, sampleSize, y, yUnique, iLambda = 0.)
        approximateGradient = (costPlus-costMinus)/(2*epsilon)
        print (approximateGradient)

epsilon = 0.0001
gradientCheck(betaInitial, layer, sampleSize, epsilon)


3.8423308979e-06
8.10018718767e-09
8.52651282912e-10
1.1226575225e-08
0.0
0.0
7.1054273576e-11
0.0
3.86989995604e-06
1.06581410364e-08


http://www.holehouse.org/mlclass/09_Neural_Networks_Learning.html

In [None]:
def betaOptimisation_1(flatBeta, flatX, sampleSize, y, yUnique, iLambda=0.):

    optimisedBeta = optimize.minimize(costFunction, flatBeta, args=(layer, flatX, sampleSize, y, yUnique),
                                      method=None, jac=backPropagation, options={'maxiter':50})

#     optimisedBeta = optimize.fmin_cg(costFunction, fprime=backPropagation, x0=flatBeta,
#                                      args=(layer, flatX, sampleSize, y, yUnique),
#                                      maxiter=50,disp=True,full_output=True)
    return(optimisedBeta)

In [None]:
def betaOptimisation_2(flatBeta, flatX, sampleSize, y, yUnique, iLambda=0.):

#     optimisedBeta = optimize.minimize(costFunction, flatBeta, args=(layer, flatX, sampleSize, y, yUnique),
#                                       method=None, jac=backPropagation, options={'maxiter':50})

    optimisedBeta = optimize.fmin_cg(costFunction, fprime=backPropagation, x0=flatBeta,
                                     args=(layer, flatX, sampleSize, y, yUnique),
                                     maxiter=50,disp=True,full_output=True)
    return(optimisedBeta)

In [None]:
a = betaOptimisation_1(betaInitial, flatX, sampleSize, y, yUnique, iLambda=0.)

In [None]:
b = betaOptimisation_2(betaInitial, flatX, sampleSize, y, yUnique, iLambda=0.)

In [None]:
def qualityControl(optimisedBeta, layer, flatX, sampleSize, y, yUnique, iLambda = 0.):
    X = flatX.reshape(sampleSize,-1)
    yAssignmentVector = []
    misAssignedIndex = []
    for n in range(sampleSize):
        x = X[n]
        yAssignment =  np.argmax(forwardPropagation(optimisedBeta, layer, X[n], 1)[1]) + 1
        if yAssignment == y[n]:
            yAssignmentVector += [True]
        else:
            yAssignmentVector += [False]
            misAssignedIndex += [n]
    return (sum(yAssignmentVector)/sampleSize)

In [None]:
# neuralNetworkClassifier(, flatX, sampleSize, y, yUnique, iLambda=0.)
qualityControl(a['x'], layer, flatX, sampleSize, y, yUnique, iLambda = 0.)

In [None]:
qualityControl(b[0], layer, flatX, sampleSize, y, yUnique, iLambda = 0.)