[M] Overview
This notebook is a Python implementation of the MATLAB script `ex3.m`. It covers training and testing logistic regression classifiers for recognizing handwritten digits using a one-vs-all approach. We will use the following functions, implemented in Python:
- `lrCostFunction`
- `oneVsAll`
- `predictOneVsAll`
- `predict`
- `sigmoid`

In [None]:
# [Y] Import Libraries
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from scipy.io import loadmat

In [None]:
# [Y] Load and Visualize Data
# Load training data from MAT file
data = loadmat('ex3data1.mat')
X, y = data['X'], data['y'].ravel()
y[y == 10] = 0  # Map label '10' to '0'

# Randomly select 100 data points to display
m = X.shape[0]
rand_indices = np.random.permutation(m)
sel = X[rand_indices[:100], :]

def displayData(X):
    """Displays 2D data in a nice grid."""
    example_width = int(np.round(np.sqrt(X.shape[1])))
    example_height = int(X.shape[1] / example_width)

    display_rows = int(np.sqrt(X.shape[0]))
    display_cols = int(np.ceil(X.shape[0] / display_rows))
    
    pad = 1
    display_array = - np.ones((pad + display_rows * (example_height + pad),
                               pad + display_cols * (example_width + pad)))
    curr_ex = 0
    for j in range(display_rows):
        for i in range(display_cols):
            if curr_ex >= X.shape[0]:
                break
            max_val = np.max(np.abs(X[curr_ex, :]))
            rows = pad + j * (example_height + pad) + np.arange(example_height)
            cols = pad + i * (example_width + pad) + np.arange(example_width)
            display_array[np.ix_(rows, cols)] = X[curr_ex, :].reshape(example_height, example_width) / max_val
            curr_ex += 1
        if curr_ex >= X.shape[0]:
            break
    plt.imshow(display_array, cmap='gray')
    plt.axis('off')
    plt.show()

displayData(sel)

In [None]:
# [Y] Test lrCostFunction
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def lrCostFunction(theta, X, y, lambda_):
    m = y.size
    h = sigmoid(X.dot(theta))
    J = (1 / m) * (-y.T.dot(np.log(h)) - (1 - y).T.dot(np.log(1 - h))) + (lambda_ / (2 * m)) * np.sum(np.square(theta[1:]))
    grad = (1 / m) * X.T.dot(h - y)
    grad[1:] += (lambda_ / m) * theta[1:]
    return J, grad

theta_t = np.array([-2, -1, 1, 2])
X_t = np.hstack([np.ones((5, 1)), np.arange(1, 16).reshape(5, 3) / 10])
y_t = (np.array([1, 0, 1, 0, 1]) >= 0.5).astype(int)
lambda_t = 3
J, grad = lrCostFunction(theta_t, X_t, y_t, lambda_t)
print('Cost:', J)
print('Gradients:', grad)

In [None]:
# [Y] Train One-vs-All Logistic Regression
def oneVsAll(X, y, num_labels, lambda_):
    m, n = X.shape
    all_theta = np.zeros((num_labels, n + 1))
    X = np.hstack([np.ones((m, 1)), X])

    for c in range(num_labels):
        initial_theta = np.zeros(n + 1)
        res = minimize(fun=lambda t: lrCostFunction(t, X, (y == c).astype(int), lambda_)[0],
                      x0=initial_theta,
                      jac=lambda t: lrCostFunction(t, X, (y == c).astype(int), lambda_)[1],
                      method='TNC')
        all_theta[c, :] = res.x
    return all_theta

lambda_ = 0.1
all_theta = oneVsAll(X, y, 10, lambda_)

In [None]:
# [Y] Predict Using One-vs-All Classifier
def predictOneVsAll(all_theta, X):
    m = X.shape[0]
    X = np.hstack([np.ones((m, 1)), X])
    predictions = X.dot(all_theta.T)
    return np.argmax(predictions, axis=1)

pred = predictOneVsAll(all_theta, X)
accuracy = np.mean(pred == y) * 100
print(f'Training Set Accuracy: {accuracy:.2f}%')