In [2]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss
from sklearn.preprocessing import StandardScaler, OneHotEncoder



In [None]:
# 4 - 2

dataset = fetch_openml('mnist_784', version=1)

X = dataset.data.astype('float32')
X = X / 255.0
X = X - X.mean(axis=0)
y = dataset.target.astype('int')
y = y.to_numpy()
y = y.reshape(len(y), 1)

onehot = OneHotEncoder(sparse=False)
y_onehot = onehot.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size = 0.2, random_state=72)
d = X_train.shape[1]
d1 = 300
k = 10
W1 = np.random.randn(d1, d) * .01
W2 = np.random.randn(k, d1) * .01
lr = .01
epochs = 10
batch_size = 32

for epoch in range(epochs):
    for i in range(0, X_train.shape[0], batch_size):
        X_batch = X_train[i:i + batch_size]
        y_batch = y_train[i:i + batch_size]
        
        z1 = np.dot(W1, X_batch.T)
        a1 = 1 / (1 + np.exp(-z1))
        z2 = np.dot(W2, a1)
        y_hat = np.exp(z2) / np.sum(np.exp(z2), axis = 0)
        
        dz2 = (y_hat - y_batch.T)
        dW2 = np.dot(dz2, a1.T) / batch_size
        da1 = np.dot(W2.T, dz2)
        dz1 = da1 * a1 * (1 - a1)
        dW1 = np.dot(dz1, X_batch) / batch_size
        
        W1 -= lr * dW1
        W2 -= lr * dW2
        
    y_hat_train = np.exp(np.dot(W2, 1 / (1 + np.exp(-np.dot(W1, X_train.T))))) / np.sum(np.exp(np.dot(W2, 1 / (1 + np.exp(-np.dot(W1, X_train.T))))), axis = 0)
    train_loss = log_loss(y_train, y_hat_train.T)
    train_acc = accuracy_score(np.argmax(y_train, axis = 1), np.argmax(y_hat_train, axis = 0))
    
    print(f'Epoch {epoch + 1} -- Loss: {train_loss} -- Accuracy: {train_accuracy}')