<a href="https://colab.research.google.com/github/nfilipas/handson-ml3/blob/main/exercises/chapter4/ex12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [56]:
from sklearn.datasets import load_iris
from copy import deepcopy
from matplotlib import pyplot as plt

import numpy as np

In [2]:
iris = load_iris(as_frame=True) # load dataset

# get X and y
X = iris.data
y = iris.target

In [3]:
def shuffle_and_split_data(data, test_ratio, seed):
    np.random.seed(seed)
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

In [4]:
# split among training, validation and test sets

seed = 42

X_train_val, X_test = shuffle_and_split_data(X, 0.2, seed)
y_train_val, y_test = shuffle_and_split_data(y, 0.2, seed)

X_train, X_val = shuffle_and_split_data(X_train_val, 0.2, seed)
y_train, y_val = shuffle_and_split_data(y_train_val, 0.2, seed)

In [43]:
class softmax_regressor:

    def __init__(self):
        self.trained = False

    def train(self, X, y, n_epochs, eta, seed):
        m = len(X) # number of instances
        n = len(X.columns) # number of features
        K = len(np.unique(y)) # number of classes

        # random initialization of matrix theta
        np.random.seed(seed)
        self.theta = np.random.randn(K, n+1)

        # extend X matrix
        X_ext = np.concatenate((np.ones((m, 1)), X), axis=1)

        # calculate y as a matrix
        y_matrix = np.empty((K, m))
        for c in range(m):
            for k in range(K):
                y_matrix[k, c] = y.iloc[c] == k

        for epoch in range(n_epochs):

            # calculate matrix s
            s = self.theta @ X_ext.T # s has classes on rows, number of instances on columns

            # calculate matrix p_hat
            p_hat = np.empty(s.shape)
            for c in range(m):
                den = 0
                for k in range(K):
                    den += np.exp(s[k, c])
                for k in range(K):
                    p_hat[k, c] = np.exp(s[k, c]) / den

            # calculate gradient matrix
            diff_matrix = p_hat - y_matrix

            gradients = np.zeros(self.theta.shape)
            for k in range(K):
                for c in range(m):
                    gradients[k, :] += diff_matrix[k, c] * X_ext[c, :]
            gradients = 1/m*gradients

            # make a step
            self.theta -= eta*gradients

        self.trained = True

    def predict(self, X):
        if not self.trained:
            raise ValueError("Classifier has not been trained yet.")

        m = len(X)
        X_ext = np.concatenate((np.ones((m, 1)), X), axis=1)
        return np.argmax(self.theta @ X_ext.T, axis=0)


In [None]:
n_epochs_tot = 1000
eta = 0.1
seed = 42
accuracy_train_all = []
accuracy_val_all = []
best_accuracy_val = 0

for n_epochs in range(n_epochs_tot):
    softmax_clf = softmax_regressor()
    softmax_clf.train(X_train, y_train, n_epochs=n_epochs, eta=eta, seed=seed)
    y_train_pred = softmax_clf.predict(X_train)
    accuracy_train = np.sum((y_train - y_train_pred) == 0) / len(y_train)
    accuracy_train_all.append(accuracy_train)
    y_val_pred = softmax_clf.predict(X_val)
    accuracy_val = np.sum((y_val - y_val_pred) == 0) / len(y_val)
    accuracy_val_all.append(accuracy_val)
    if accuracy_val > best_accuracy_val:
        best_accuracy_val = accuracy_val
        best_model = deepcopy(softmax_clf)

In [None]:
plt.figure(figsize=(16, 6))
plt.plot(range(n_epochs_tot), accuracy_train_all, label="Training set")
plt.plot(range(n_epochs_tot), accuracy_val_all, label="Validation set", alpha=0.7)
plt.grid()
plt.legend()
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Early stopping")
plt.show()

In [None]:
y_test_pred = best_model.predict(X_test)
accuracy_test = np.sum((y_test - y_test_pred) == 0) / len(y_test)
print(accuracy_test)