In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
from scipy.optimize import fmin_cg, minimize

Load data

In [2]:
data = loadmat("ex3data1.mat")
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])

In [3]:
# Get X
X_raw = data['X']
# Add ones to X_raw
X = np.c_[np.ones(X_raw.shape[0]).T, X_raw]
print("Shape of X: ", X.shape)
# Get y
y = data['y']
# m, n
m, n = X.shape

Shape of X:  (5000, 401)


Softmax function

In [4]:
def softmax(z):
    e_z = np.exp(z);
    return e_z / np.sum(e_z);

def softmaxCostFunction(theta, X, y, lamda = 0):
    theta = np.reshape(theta, (theta.size, 1))
    m = X.shape[0];
    z = X.dot(theta);
    a = softmax(z);
    J = np.mean(-y * np.log(a));
    J += (lamda/(2*m))*np.sum(np.square(theta[1:]))
    return J;

def softmaxGradient(theta, X, y, lamda = 0):
    theta = np.reshape(theta, (theta.size, 1))
    m = X.shape[0];
    z = X.dot(theta);
    a = softmax(z);
    grad = np.dot(X.T, a - y) / m;
    grad[1:] = grad[1:] + (lamda/m) * theta[1:]
    return (grad.flatten());

def SoftmaxOneVsAll(X,y,lamda,num_labels):    
    m, n = X.shape;
    all_theta = np.zeros((num_labels, n))
    initTheta = np.zeros(n);
    max_iters = 500;
    for k in np.arange(1, 11):
        Y = (y == (k)).astype(int)
        all_theta[k % 10, :] = fmin_cg(softmaxCostFunction,initTheta,fprime = softmaxGradient,args=(X,Y,lamda), maxiter = max_iters,disp = 1)
        print("Finished checking OneVsAll number: ", k % 10);
    print("Finished checking all number ");
    return all_theta

def softmaxStable(Z):
    e_Z = np.exp(Z - np.max(Z, axis = 0, keepdims = True))
    A = e_Z / e_Z.sum(axis = 0)
    return A

def predictSoftmaxOneVsAll(all_theta, X):
    m, n = X.shape;
    # X : m * n
    # all_theta: n_label * n
    prob = softmaxStable(X.dot(all_theta.T));
    pred = np.argmax(prob, axis=1)
    return pred

def accuracy(pred,y):
    pred.shape = (pred.size,1)
    return np.mean((pred == y % 10))*100

In [5]:
softmax(np.array([[1.2],[0.9],[0.4]]))

array([[ 0.45659032],
       [ 0.33825043],
       [ 0.20515925]])

In [6]:
all_theta = SoftmaxOneVsAll(X, y, 0, 10)

         Current function value: 0.851719
         Iterations: 0
         Function evaluations: 99
         Gradient evaluations: 87
Finished checking OneVsAll number:  1
         Current function value: 0.851719
         Iterations: 0
         Function evaluations: 107
         Gradient evaluations: 95
Finished checking OneVsAll number:  2
         Current function value: 0.851719
         Iterations: 0
         Function evaluations: 109
         Gradient evaluations: 97
Finished checking OneVsAll number:  3
         Current function value: 0.851719
         Iterations: 0
         Function evaluations: 98
         Gradient evaluations: 86
Finished checking OneVsAll number:  4
         Current function value: 0.851719
         Iterations: 0
         Function evaluations: 103
         Gradient evaluations: 91
Finished checking OneVsAll number:  5
         Current function value: 0.851719
         Iterations: 0
         Function evaluations: 101
         Gradient evaluations: 89
Finished