In [1]:
import numpy as np
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
    transform=lambda x: np.array(x).flatten(),
    download=True,
    train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)
    return mnist_data, mnist_labels

train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

train_X = np.array(train_X)
test_X = np.array(test_X)

train_Y = np.array(train_Y)
test_Y = np.array(test_Y)

In [2]:
def one_hot_encode(labels, num_classes=10):
    return np.eye(num_classes)[labels]

def normalize_data(data):
    return data / 255.0

In [3]:
train_X = normalize_data(train_X)
test_X = normalize_data(test_X)

train_Y_encoded = one_hot_encode(train_Y)
test_Y_encoded = one_hot_encode(test_Y)

In [4]:
def softmax(z):
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)


def forward_propagation(X, W, b):
    z = np.dot(X, W) + b
    return softmax(z)

In [5]:
def cross_entropy_loss(y_pred, y_true):
    return -np.mean(np.sum(y_true * np.log(y_pred + 1e-8), axis=1))


def compute_gradients(X, y_true, y_pred):
    m = X.shape[0]
    grad_W = np.dot(X.T, (y_true - y_pred)) / m
    grad_b = np.mean(y_true - y_pred, axis=0)
    return grad_W, grad_b

In [6]:
# np.random.seed(4)
input_size = 784
output_size = 10
learning_rate = 0.01
num_epochs = 500
batch_size = 100

W = np.random.randn(input_size, output_size) * 0.01
b = np.zeros(output_size)

In [7]:
for epoch in range(num_epochs):
    indices = np.arange(train_X.shape[0])
    train_X = train_X[indices]
    train_Y_encoded = train_Y_encoded[indices]

    for start_idx in range(0, train_X.shape[0], batch_size):
        end_idx = min(start_idx + batch_size, train_X.shape[0])
        batch_X = train_X[start_idx:end_idx]
        batch_Y = train_Y_encoded[start_idx:end_idx]

        y_pred = forward_propagation(batch_X, W, b)

        grad_W, grad_b = compute_gradients(batch_X, batch_Y, y_pred)

        W += learning_rate * grad_W
        b += learning_rate * grad_b

    train_predictions = forward_propagation(train_X, W, b)
    train_loss = cross_entropy_loss(train_predictions, train_Y_encoded)
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {train_loss:.4f}")

Epoch 1/500, Loss: 0.7402
Epoch 2/500, Loss: 0.5727
Epoch 3/500, Loss: 0.5047
Epoch 4/500, Loss: 0.4662
Epoch 5/500, Loss: 0.4406
Epoch 6/500, Loss: 0.4222
Epoch 7/500, Loss: 0.4080
Epoch 8/500, Loss: 0.3967
Epoch 9/500, Loss: 0.3874
Epoch 10/500, Loss: 0.3796
Epoch 11/500, Loss: 0.3729
Epoch 12/500, Loss: 0.3671
Epoch 13/500, Loss: 0.3619
Epoch 14/500, Loss: 0.3573
Epoch 15/500, Loss: 0.3532
Epoch 16/500, Loss: 0.3495
Epoch 17/500, Loss: 0.3461
Epoch 18/500, Loss: 0.3430
Epoch 19/500, Loss: 0.3401
Epoch 20/500, Loss: 0.3375
Epoch 21/500, Loss: 0.3350
Epoch 22/500, Loss: 0.3327
Epoch 23/500, Loss: 0.3306
Epoch 24/500, Loss: 0.3286
Epoch 25/500, Loss: 0.3267
Epoch 26/500, Loss: 0.3249
Epoch 27/500, Loss: 0.3232
Epoch 28/500, Loss: 0.3216
Epoch 29/500, Loss: 0.3201
Epoch 30/500, Loss: 0.3187
Epoch 31/500, Loss: 0.3173
Epoch 32/500, Loss: 0.3160
Epoch 33/500, Loss: 0.3148
Epoch 34/500, Loss: 0.3136
Epoch 35/500, Loss: 0.3124
Epoch 36/500, Loss: 0.3113
Epoch 37/500, Loss: 0.3103
Epoch 38/5

In [8]:
def evaluate_accuracy(X, Y, W, b):
    y_pred = forward_propagation(X, W, b)
    predicted_labels = np.argmax(y_pred, axis=1)
    true_labels = np.argmax(Y, axis=1)
    return np.mean(predicted_labels == true_labels)

In [10]:
train_accuracy = evaluate_accuracy(train_X, train_Y_encoded, W, b)
test_accuracy = evaluate_accuracy(test_X, test_Y_encoded, W, b)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Testing Accuracy: {test_accuracy * 100:.2f}%")

Training Accuracy: 93.20%
Testing Accuracy: 92.51%
