In [283]:
import scipy.io
import pandas as pd
import numpy as np
from scipy.optimize import fmin_cg

In [284]:
def sigmoid(z):
    '''
        Calculates the sigmoid value for any vector/matrix/scaler.
    '''
    z = np.array(z);
    g = np.zeros(z.shape);
    g = 1/(1 + np.exp(-z));
    
    return g

In [399]:
def costFunctionLR(theta,X,y,lamda):
    '''
        Cost Function

        This function computes the cost for implementing logistic regression and contains
        the gradient descent implementation too.

        The code has been vectorised and uses regularisation using the regularisation parameter 'lambda_'
        as an argument.
    '''
    theta = theta.reshape((theta.size, 1))
    m = y.size
    h = sigmoid(X.dot(theta))
    J = np.mean(-y * np.log(h) - (1 - y) * np.log(1 - h))
    J += np.square(theta[1:]).sum() * lamda / 2 / m
    grad = X.T.dot(h - y) / m
    grad[1:] = grad[1:] + lamda * theta[1:] / m
    return J, grad.ravel()


In [403]:
def oneVsAll(X,y,num_labels,lambda_):
    '''
        ONEVSALL trains multiple logistic regression classifiers and returns all
        the classifiers in a matrix all_theta, where the i-th row of all_theta 
        corresponds to the classifier for label i
    '''
    m = X.shape[0]
    n = X.shape[1]
    
    all_theta = np.zeros((num_labels,n+1));
    
    X = np.concatenate([np.ones((m, 1)), X], axis=1);
    
    cost_function = lambda p, y: costFunctionLR(p, X, y, lambda_)[0]
    grad_function = lambda p, y: costFunctionLR(p, X, y, lambda_)[1]
        
    for i in range(1, num_labels + 1):
        initial_theta = np.zeros(n + 1)
        all_theta[i - 1, :] = fmin_cg(cost_function, initial_theta, fprime=grad_function,
                                      args=(np.where((y == i), 1, 0),), maxiter=100, disp=False)

    return all_theta

In [430]:
def predict(theta1, theta2, X):
    '''
        Neural Network Implementation given trained classifiers and corresponding
        parameter vector theta1 and theta2 between the given layers.
        
        theta1 = (25 X 401)
        theta2 = (10 X 26)
        X = (5000 X 400)
    '''
    m = X.shape[0];
    num_labels = Theta2.shape[0];
    
    X = np.concatenate([np.ones((m, 1)), X], axis=1);
    
    l1 = sigmoid(X@Theta1.T);
    l1 = np.concatenate([np.ones((m, 1)), l1], axis=1);
    
    return np.argmax(sigmoid(l1@theta2.T), axis=1) + 1;

In [435]:
def predictOneVsAll(all_theta, X):
    '''
        Returns the prediction vector without nn implementation
    '''
    m = X.shape[0]
    X = np.concatenate([np.ones((m, 1)), X], axis=1);
    
    return np.argmax(sigmoid(X@all_theta.T), axis=1) + 1;

In [431]:
# Test Cost Function

#test parameters
theta_t = np.array([-2,-1,1,2]).reshape(-1,1);
X_t = np.array(np.hstack((np.ones((5,1)), np.arange(1,16).reshape(3,5).T/10)));
y_t = np.array([1,0,1,0,1]).reshape(-1,1);
lambda_t = 3;

cost, grad = costFunctionLR(theta_t, X_t, y_t, lambda_t);

In [432]:
# Cost computed by the cost function
print(cost)

2.5348193961097443


Expected cost: 2.534819

In [433]:
# gradient values computed by the function
print(grad)

[ 0.14656137 -0.54855841  0.72472227  1.39800296]


Expected gradients:<br>
 0.146561<br>
 -0.548558<br>
 0.724722<br>
 1.398003

In [434]:
mat = scipy.io.loadmat('ex3data1.mat')

X = pd.DataFrame(mat['X']);
y = pd.DataFrame(mat['y']);

input_layer_size  = 400;  # 20x20 Input Images of Digits
num_labels = 10;   

reg_param = 0.1;
all_theta = oneVsAll(X, y, num_labels, reg_param);

In [436]:
p = predictOneVsAll(all_theta, X).reshape(-1,1)
accuracy = np.mean((p == y).astype(int))
print('Training Set Accuracy: %.2f %%' % (accuracy * 100))

Training Set Accuracy: 96.20 %


Expected Accuracy ~ 94.9%

In [437]:
#Neural Network Implementation using precomputed parameter vectors.
print('Loading Saved Neural Network Parameters ...')
params = scipy.io.loadmat('ex3weights.mat')
Theta1 = params['Theta1']
Theta2 = params['Theta2']

Loading Saved Neural Network Parameters ...


In [438]:
p = predict(Theta1, Theta2, X).reshape(-1,1);
accuracy = np.mean((p == y).astype(int))
print('Training Set Accuracy: %.2f %%' % (accuracy * 100))

Training Set Accuracy: 97.52 %


Expected Accuracy ~ 97.5%