In [44]:
# Initial setup

%precision 3
%matplotlib inline

import numpy as np
from scipy.io import loadmat
import scipy.optimize as opt


path = 'machine-learning-ex3/ex3/'

## 1. Multi-class Logistic Regression

### 1.1 Examine Data

In [108]:
# Read in data
digits = loadmat(path + 'ex3data1.mat')
X, y = digits['X'], digits['y']

### 1.2 Define functions

In [109]:
def sigmoid(z):
    return (1 + np.exp(-z))**-1

def reg_cost(theta, X, y, L):
    m, n = X.shape
    theta = theta.reshape([1, n])
    pos = y*np.log(sigmoid(X.dot(theta.T)))
    neg = (1 - y)*np.log(1 - sigmoid(X.dot(theta.T)))
    reg = L*np.sum(theta[-1, 1:]**2)/(2*m)
    J = -np.sum(pos + neg)/m + reg
    return J

def reg_gradient(theta, X, y, L):
    m, n = X.shape
    theta = theta.reshape([1, n])
    reg = (L*np.hstack([np.zeros(1), theta[-1, 1:]]))/m
    errs = sigmoid(X.dot(theta.T)) - y
    grad = np.sum(errs*X)/m + reg
    return grad

### 1.3 One-vs-all Classification

In [110]:
def multi_logistic(X, y, L):    
    m, n = X.shape
    labels = np.unique(y)
    params = np.zeros([labels.size, n + 1])
    
    X = np.hstack([np.ones([m, 1]), X])
    theta = np.zeros(n + 1)
    
    for i in labels:
        result = opt.minimize(fun=reg_cost, 
                              x0=theta, 
                              args=(X, y == i, L), 
                              method='CG', 
                              jac=reg_gradient)
        params[i - 1, :] = result.x
        
    return params

In [111]:
# Train classifier
L = 0.1
params = multi_logistic(X, y, L)

  from ipykernel import kernelapp as app


In [112]:
# Define multi-class prediction
def predict_all(theta, X):
    m, n = X.shape
    X = np.hstack([np.ones([m, 1]), X])
    predict = np.argmax(X.dot(theta.T), axis=1) + 1
    return predict

In [118]:
# Predict classification
p = predict_all(params, X).reshape([predict.size, 1]) == y
acc = np.mean(p)*100
print 'Train accuracy: {}%'.format(acc)

Train accuracy: 10.0%


# 2. Neural Networks

## 2.1 Examine Data

In [61]:
# Read in data
weights = loadmat(path + 'ex3weights.mat')
theta1, theta2 = weights['Theta1'], weights['Theta2']

a2, w1 = theta1.shape[0], theta1.shape[1]
a3, w2 = theta2.shape[0], theta2.shape[1]

print 'Layer 2 has {} parameters learnt from {} weights'.format(a2, w1)
print 'Output layer has {} parameters learnt from {} weights'.format(a3, w2)

Layer 2 has 25 parameters learnt from 401 weights
Output layer has 10 parameters learnt from 26 weights


## 2.2 Feedforward Propagation

In [75]:
def neural_networks(theta1, theta2, X):
    m, n = X.shape
    X = np.hstack([np.ones([m, 1]), X])
    a2 = sigmoid(X.dot(theta1.T))
    a2 = np.hstack([np.ones([m, 1]), a2])
    a3 = sigmoid(a2.dot(theta2.T))
    predict = np.argmax(a3, axis=1) + 1
    return predict

### 2.3 Prediction

In [106]:
# Predict classification
predict = neural_networks(theta1, theta2, X)
p = predict.reshape([predict.size, 1]) == y
acc = np.mean(p)*100
print 'Train accuracy: {}%'.format(acc)

Train accuracy: 97.52%
