In [1]:
import numpy as np
import math
from scipy.io import loadmat
import matplotlib.pyplot as plt
import pandas as pd
plt.rcParams['figure.figsize'] = (10.0, 10.0)
np.set_printoptions(suppress=True) 

In [2]:
data = loadmat('./ex4data1.mat')
data.keys()

FileNotFoundError: [Errno 2] No such file or directory: './ex4data1.mat'

In [None]:
X = data['X']
y = data['y']
# convert the labels to a 10-d vector
y = pd.get_dummies( y.ravel() ).to_numpy() # 5000x10
print("Shape of X: ", X.shape)
print("Shape of y: ", y.shape)

## Model presentation

In [5]:
weights_data = loadmat('./ex4weights.mat')
weights_data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'Theta1', 'Theta2'])

In [6]:
theta1, theta2 = weights_data['Theta1'], weights_data['Theta2']
print( theta1.shape, theta2.shape)

(25, 401) (10, 26)


## Feedforward and cost functon

In [7]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [8]:
def randInitializeWeights(L_in, L_out):
    eps = 0.12
    return 2 * eps * np.random.random_sample((L_out, L_in + 1)) - eps

In [9]:
def sigmoi_gradient(z):
    return sigmoid(z)*( 1 - sigmoid(z) )

In [25]:
def nn_cost_function(theta_params, X, y, lamda ):
    
    theta1, theta2 = np.split(theta_params,[25*401])
    theta1 = np.reshape(theta1,(25,401))
    theta2 = np.reshape(theta2,(10,26))
    
    m = X.shape[0]
    a1 = np.c_[np.ones(X.shape[0] ),X]     #5000x401
    
    z2 = theta1.dot(a1.T)
    a2 = np.r_[ np.ones( (1, z2.shape[1]) ), sigmoid(z2) ]
    
    z3 = theta2.dot( a2 )
    a3 = sigmoid(z3) # 10x5000
    
    J = (-1/m) * np.trace( np.log(a3).dot(y) + np.log(1 - a3).dot( ( 1 - y ))) + \
            ( lamda/(2*m) ) * ( np.trace( (theta1[:,1:]).T.dot(theta1[:,1:]) ) + np.trace( (theta2[:,1:]).T.dot(theta2[:,1:])))
    
    #Backpropagation
    delta3 = a3.T - y # (5000x10)
    delta2 = theta2[:, 1:].T.dot(delta3.T) * sigmoi_gradient(z2) # (25x5000)

    D1 = delta2.dot(a1) #(25x401)
    D2 = delta3.T.dot(a2.T) #(10x26)

    theta1_reg = np.c_[np.ones((theta1.shape[0],1)),theta1[:,1:]]
    theta2_reg = np.c_[np.ones((theta2.shape[0],1)),theta2[:,1:]]
    
    theta1_grad = D1/m + (theta1_reg * lamda)/m 
    theta2_grad= D2/m + (theta2_reg * lamda)/m
    
    grad = np.r_[theta1_grad.ravel(), theta2_grad.ravel()]
    return J, grad

In [28]:
theta_params = np.r_[theta1.ravel(), theta2.ravel()]
J, grad = nn_cost_function(theta_params, X, y, 1)
print(J)

0.3837698590909236


## Backpropagation

In [29]:
theta1_0 = randInitializeWeights(400, 25)
theta2_0 = randInitializeWeights(25, 10)
theta_0 = np.r_[theta1_0.ravel(), theta2_0.ravel()]

In [31]:
from scipy.optimize import minimize
lamda = 1
nn = minimize( fun = nn_cost_function, x0 = theta_0, 
              args = (X, y, lamda), 
              method = 'CG', jac = True, options = {'maxiter' : 400} )

In [32]:
nn

     fun: 0.33729713900244873
     jac: array([-0.00048899, -0.00000069, -0.00000061, ..., -0.00004825,
       -0.00001293, -0.00001292])
 message: 'Desired error not necessarily achieved due to precision loss.'
    nfev: 770
     nit: 268
    njev: 758
  status: 2
 success: False
       x: array([-0.57359371, -0.00345938, -0.00307464, ..., -2.66330719,
        0.51222646,  1.51127093])

In [56]:
def predict( theta1, theta2, X ):
    z2 = theta1.dot(np.c_[np.ones(X.shape[0] ),X].T)
    a2 = np.r_[ np.ones( (1, z2.shape[1]) ), sigmoid(z2) ].T
    prob = sigmoid(a2.dot(theta2.T))
    pred =  np.argmax(prob, axis = 1 )+1
    return pred

In [57]:
y

array([[0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       ...,
       [0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 1, 0]], dtype=uint8)

In [58]:
predict( res_layer1, res_layer2, X )

array([10, 10, 10, ...,  9,  9,  9], dtype=int64)

In [59]:
res_layer1 = nn.x[0:25*401].reshape(25, 401)
res_layer2 = nn.x[25*401:].reshape(10, 26)
training_accuracy = np.mean( (data["y"].ravel() == predict( res_layer1, res_layer2, X ))*100 )
print( 'training accuracy = ', training_accuracy )

training accuracy =  99.22
