In [2]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import math

In [3]:
import scipy.io as sio
mat_data = sio.loadmat('ex3data1.mat')
X = mat_data['X']
y = mat_data['y'].ravel()
m, n = X.shape

The matlab file data consists of 5000 rows and 401 columns
where last column has values of label (10 classes for classification problem) which varies from 1 to 10. 

Each row contains the Greyscale image pixel value for 20*20 image flatten into a row vector of 400*1 dimension.

Cost Function and Sigmoid Function

In [4]:
def lr_cost_function(theta, X, y, l):
    
    m, n = X.shape

    mask = np.eye(len(theta))
    mask[0, 0] = 0

    X_dot_theta = X.dot(theta)

    J = 1.0 / m * (-y.T.dot(np.log(sigmoid(X_dot_theta))) - (1 - y).T.dot(np.log(1 - sigmoid(X_dot_theta)))) \
        + l / (2.0 * m) * np.sum(np.square(mask.dot(theta)))

    grad = 1.0 / m * (sigmoid(X_dot_theta) - y).T.dot(X).T + 1.0 * l / m * (mask.dot(theta))
    return J, grad


def sigmoid(z):
    g = 1 / (1 + np.exp(-z))
    return g

Verifying the cost function with given value 
of THETA 

In [5]:
theta_t = np.array([-2, -1, 1, 2])

X_t = np.hstack((np.ones((5, 1)), np.arange(1, 16).reshape(5, 3, order='F') / 10.0))
print(X_t.shape)
y_t = np.array([1, 0, 1, 0, 1])
lambda_t = 3

cost, grad = lr_cost_function(theta_t, X_t, y_t, lambda_t)

print ('Cost:', cost)
print ('Expected cost: 2.534819')
print ('Gradients: \n', grad)
print ('Expected gradients: 0.146561 -0.548558 0.724722 1.398003')

(5, 4)
Cost: 2.534819396109744
Expected cost: 2.534819
Gradients: 
 [ 0.14656137 -0.54855841  0.72472227  1.39800296]
Expected gradients: 0.146561 -0.548558 0.724722 1.398003


Optimizing Function

y==label is used to get a Boolean Array

In [6]:
import scipy.optimize as opt


def one_vs_all(X, y, num_labels, l):
    m, n = X.shape
    all_theta = np.zeros((num_labels, n + 1))
    X = np.hstack((np.ones((m, 1)), X))
    initial_theta = np.zeros(n + 1)

    for i in range(0, 10):
        label = 10 if i == 0 else i
        result = opt.minimize(fun=lr_cost_function, x0=initial_theta, args=(X, (y==label).astype(int), l),
                              method='TNC', jac=True)
    
        all_theta[i, :] = result.x

    return all_theta

Calling optimising function to find the value of THETA

In [7]:
input_layer_size = 400  # 20x20 Input Images of Digits
num_labels = 10  # 10 labels, from 1 to 10
l = 0.1
all_theta = one_vs_all(X, y, num_labels, l)

Adding column to matrix X

In [8]:
M=np.ones((X.shape[0],1))
X=np.concatenate((M,X), axis=1)

In [9]:
y=np.reshape(y,(5000,1))

PREDICT FUNCTION

In [10]:
def predict(theta,X,y):
    prob=sigmoid(np.dot(X,theta.T));
    return(np.argmax(prob, axis=1))

CALCULATING THE ACCURACY PERCENTAGE

In [11]:
pred=predict(all_theta,X,y)
pred[pred==0]=10;
print('Training set accuracy: {} %'.format(np.mean(pred == y.ravel())*100))

Training set accuracy: 96.46000000000001 %


LOADING WEIGHTS FROM THE FILE, WHICH ARE ALREADY OPTIMISED WEIGHTS

In [17]:
mat_weightsdata = sio.loadmat('ex3weights.mat')
Theta1 = mat_weightsdata['Theta1']
Theta2 = mat_weightsdata['Theta2']
print(Theta1.shape)
print(Theta2.shape)

(25, 401)
(10, 26)


NEURAL NETWORK

In [18]:
def neuralnetwork(X,Theta1,Theta2):
    activation2=sigmoid(np.dot(X,Theta1.T));
    activation2=np.concatenate((M,activation2),axis=1);
    activation3=sigmoid(np.dot(activation2,Theta2.T));
    return(np.argmax(activation3, axis=1)+1)

ACCURACY

In [14]:
prednn=neuralnetwork(X,Theta1,Theta2)
print('Training set accuracy: {} %'.format(np.mean(prednn == y.ravel())*100))

Training set accuracy: 97.52 %
