## Training Models Exercises

12. Batch GD for a softmax regression from scratch

Loading the iris dataset

In [127]:
import numpy as np
from sklearn.datasets import load_iris

iris = load_iris(as_frame=True)
X = iris.data[["petal length (cm)", "petal width (cm)"]].values
y = iris["target"]


Data prepoccessing (changed to author's method because initially used sklearn stuff)

In [128]:
def to_one_hot(y):
    n_classes = y.unique().shape[0]
    m = len(y)
    Y_one_hot = np.zeros((m, n_classes))
    Y_one_hot[np.arange(m), y] = 1
    return Y_one_hot

X = np.c_[np.ones([len(X), 1]), X]
rnd_indices = np.random.permutation(len(X))
X_test = X[rnd_indices[:30]]
y_test_onehot = to_one_hot(y[rnd_indices[:30]])
X_train = X[rnd_indices[30:]]
y_train_onehot = to_one_hot(y[rnd_indices[30:]])
y_test = y[rnd_indices[:30]]

Regular batch GD, without early stopping

In [129]:
def softmax_function(theta, x):
    scores = x.dot(theta)
    exp_scores = np.exp(scores)
    return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
eta = 0.01
n_epochs = 10001
m = len(X)
theta = np.random.randn(3, X_train.shape[1])
for epoch in range(n_epochs):
    gradients = 1 / m * X_train.T.dot(softmax_function(theta, X_train) - y_train_onehot)
    theta = theta - eta * gradients
theta

array([[ 3.79290443, -0.95195374, -3.74163003],
       [-1.20250358,  0.43128873, -0.13282586],
       [-1.41644888, -0.81589018,  2.60360352]])

In [130]:
Y_proba = softmax_function(theta, X_test)
y_predict = np.argmax(Y_proba, axis=1)

accuracy_score = np.mean(y_predict == y_test)

accuracy_score

np.float64(0.8666666666666667)

Implementing early stopping

In [131]:
eta = 0.01
n_epochs = 1000001
m = len(X)
alpha = 0.1

minimum_loss = float("inf")
theta = np.random.randn(3, X_train.shape[1])

for epoch in range(n_epochs):
    probabilities = softmax_function(theta, X_train)
    gradients = 1 / m * X_train.T.dot(probabilities - y_train_onehot) + np.r_[np.zeros([1, 3]), alpha * theta[1:]]
    theta = theta - eta * gradients
    loss = -np.mean(np.sum(y_train_onehot * np.log(probabilities + 1e-7), axis=1)) + (alpha / 2 * np.sum(theta[1:] ** 2))
    if loss < minimum_loss:
        minimum_loss = loss
    else:
        print(epoch)
        break

298509


In [132]:
Y_proba = softmax_function(theta, X_test)
y_predict = np.argmax(Y_proba, axis=1)

accuracy_score = np.mean(y_predict == y_test)

accuracy_score

np.float64(0.9)