# Neural Network

In [47]:
import numpy as np
from scipy.io import loadmat

In [48]:
data = loadmat('data/ex4data1.mat')
data

{'X': array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]]),
 '__globals__': [],
 '__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 'y': array([[10],
        [10],
        [10],
        ..., 
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}

In [49]:
X = data['X']
y_t = data['y']

In [50]:
num_labels = 10
y = np.zeros([y_t.shape[0], num_labels])
for i in range(0, y_t.shape[0]):
    if y_t[i] == 10:
        y_t[i] = 0
    y[i, y_t[i]] = 1

In [51]:
X = np.matrix(X)
y = np.matrix(y)

In [52]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [53]:
def sigmoid_gradient(z):
    return np.multiply(sigmoid(z), (1 - sigmoid(z)))

In [54]:
def forwardprop(X, theta1, theta2):
    a1 = np.insert(X, 0, 1, axis = 1)
    z2 = np.matmul(a1, theta1.T)
    a2 = sigmoid(z2)
    a2 = np.insert(a2, 0, np.ones(X.shape[0]), axis = 1)
    z3 = np.matmul(a2, theta2.T)
    h = sigmoid(z3)
    
    return a1, z2, a2, z3, h

In [55]:
def backprop(thetas, X, y, learning_rate, hidden_layer_size, input_size, num_labels):
    m = X.shape[0]
    
    theta1 = np.matrix(np.reshape(thetas[:(input_size + 1) * hidden_layer_size], [hidden_layer_size, input_size + 1])) #(401,25)
    theta2 = np.matrix(np.reshape(thetas[(input_size + 1) * hidden_layer_size:], [num_labels, hidden_layer_size+1])) #(26,10)
    
    a1, z2 , a2, z3, h = forwardprop(X, theta1, theta2)
    
    #cost calculation
    term1 = np.multiply(y, np.log(h))
    term2 = np.multiply((1-y), np.log(1 - h))
    J = -1*np.sum((term1 + term2))/m
    
    #adding learing rate
    J += (learning_rate/(2*m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
    
    delta1 = np.zeros(theta1.shape) #(25*401)
    delta2 = np.zeros(theta2.shape) #(10*26)
    
    #back propogation
    for i in range(0, m):
        a_1 = a1[i, :] #(1,401)
        z_2 = z2[i, :] #(1,25)
        a_2 = a2[i, :] #(1,26)
        z_3 = z3[i, :] #(1,10)
        h_ = h[i, :]   #(1,10)
        
        d3 = h_ - y[i, :]  #(1,10)
        
        z_2 = np.insert(z_2, 0, np.ones(1))    #(1,26)
        d2 = np.multiply(np.matmul(theta2.T, d3.T).T, sigmoid_gradient(z_2))  #(26,10) * (10,1) .* (1,26) = (1,26)
        
        
        delta1 = delta1 + np.matmul(d2[0,1:].T, a_1) #(25,1) * (1,401)
        delta2 = delta2 + np.matmul(d3.T, a_2)
    
    delta1 = delta1 / m
    delta2 = delta2 / m
        
    grad = np.concatenate((np.ravel(delta1), np.ravel(delta2)))
    
    return J, grad
    

In [56]:
#parameters
hidden_layer_size = 25
input_size = X.shape[1]
num_labels = 10
learning_rate = 1


#random initialization of theta1 and theta2
thetas = (np.random.random(size=hidden_layer_size * (input_size + 1) + num_labels * (hidden_layer_size + 1)) - 0.5) * 0.25


In [57]:
from scipy.optimize import minimize

optm_result = minimize(fun=backprop, x0=thetas, args=(X, y, learning_rate, hidden_layer_size, input_size, num_labels), \
                       method='TNC', jac=True, options={'maxiter': 250})

In [58]:
optm_result

     fun: 0.44655023087707679
     jac: array([-0.00292974,  0.        ,  0.        , ...,  0.00093989,
       -0.00028272,  0.00015913])
 message: 'Max. number of function evaluations reached'
    nfev: 250
     nit: 17
  status: 3
 success: False
       x: array([ 0.0558283 ,  0.03076222, -0.07714936, ..., -3.55019847,
        0.91989283,  1.17291868])

In [59]:
thetas = optm_result['x']

theta1 = np.matrix(np.reshape(thetas[:(input_size + 1) * hidden_layer_size], [hidden_layer_size, input_size + 1])) #(401,25)
theta2 = np.matrix(np.reshape(thetas[(input_size + 1) * hidden_layer_size:], [num_labels, hidden_layer_size+1])) #(26,10)

a1, z2 , a2, z3, h = forwardprop(X, theta1, theta2)

ans = np.array(range(0, X.shape[0]))
count = 0
for i in range(0, X.shape[0]):
    ans[i] = h[i,:].argmax()
    if y_t[i] == ans[i]:
        count = count + 1
print("Accuracy : ",(count*100/X.shape[0]))

Accuracy :  98.52
