In [1]:
import pandas as pd
import numpy as np
import scipy.io

In [2]:
data = scipy.io.loadmat("../data/ex3data1.mat")
raw_X = data['X']
raw_y = data['y']
raw_y[raw_y == 10] = 0
raw_X = np.insert(raw_X, raw_X.shape[1], 1, axis=1)

In [3]:
### scipy_optimize ###

In [4]:
from scipy.optimize import minimize
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def logistic_sigmoid(z):
    return 1 / (1 + np.exp(-z))

def cross_entropy_loss_regularized(theta, X, y, lambda_reg=0.1):
    m = len(y)
    theta = theta.reshape(-1, 1)
    predictions = logistic_sigmoid(np.dot(X, theta))
    regularization_term = (lambda_reg / (2 * m)) * np.sum(theta[:-1] ** 2)
    loss = -np.mean(y * np.log(predictions) + (1 - y) * np.log(1 - predictions)) + regularization_term
    return loss

def compute_gradient_regularized(theta, X, y, learning_rate=0.004, lambda_reg=0.1, num_epochs=100):
    m = len(y)
    n = X.shape[1]
    theta = theta.reshape(-1, 1)
    predictions = logistic_sigmoid(np.dot(X, theta))
    gradients = np.dot(X.transpose(), (predictions - y)) / m
    regularization_term = (lambda_reg / m) * np.concatenate((theta[:-1].flatten(), [0]))
    regularization_term = regularization_term.reshape((n, -1))
    gradients[:-1] += regularization_term[:-1]
    return gradients.flatten()

def one_vs_all(X, y, num_labels=10):
    m, n = X.shape
    all_theta = np.zeros((num_labels, n))
    
    for i in range(num_labels):
        initial_theta = np.zeros(n)
        binary_y = np.where(y == i, 1, 0)
        result = minimize(fun=cross_entropy_loss_regularized, 
                          x0=initial_theta, 
                          args=(X, binary_y), 
                          jac=compute_gradient_regularized, 
                          method='TNC')
        all_theta[i] = result.x
    
    return all_theta

def predict_one_vs_all(all_theta, X):
    m = X.shape[0]
    predictions = logistic_sigmoid(np.dot(X, all_theta.transpose()))
    return np.argmax(predictions, axis=1)


X_train, X_test, y_train, y_test = train_test_split(raw_X, raw_y, test_size=0.2, random_state=42)
print(X_train.shape)
all_theta = one_vs_all(X_train, y_train)

preds = predict_one_vs_all(all_theta, X_test)

accuracy = accuracy_score(y_test, preds)
print("Test Set Accuracy:", accuracy)

(4000, 401)
Test Set Accuracy: 0.903
