In [37]:
from tensorflow.keras.datasets import mnist
import numpy as np
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [38]:
X_train = X_train / 255.0
X_test = X_test / 255.0

X_train = X_train.reshape(-1, 28*28)
X_test = X_test.reshape(-1, 28*28)

In [39]:
def one_hot_encode(y, num_classes=10):
    return np.eye(num_classes)[y]
y_train = one_hot_encode(y_train)
y_test = one_hot_encode(y_test)

In [40]:
def initialize_parameters(input_size, hidden_size, output_size):
    W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2.0 / input_size)
    b1 = np.zeros((1, hidden_size))
    W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2.0 / hidden_size)
    b2 = np.zeros((1, output_size))
    return W1, b1, W2, b2

In [41]:
def relu(Z):
    return np.maximum(0, Z)

In [42]:
def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

In [43]:
def forward_propagation(X, W1, b1, W2, b2):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

In [50]:
def backward_propagation(X, y, Z1, A1, Z2, A2, W1, W2, lambda_reg=0.01):
    m = X.shape[0]
    dZ2 = A2 - y
    dW2 = np.dot(A1.T, dZ2) / m + lambda_reg * W2
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m
    dZ1 = np.dot(dZ2, W2.T) * (Z1 > 0)
    dW1 = np.dot(X.T, dZ1) / m + lambda_reg * W1
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m
    return dW1, db1, dW2, db2

In [51]:
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    return W1, b1, W2, b2

In [54]:
input_size = 784
hidden_size = 256
output_size = 10
learning_rate = 0.06
epochs = 150

In [55]:
W1, b1, W2, b2 = initialize_parameters(input_size, hidden_size, output_size)
for epoch in range(epochs):
    Z1, A1, Z2, A2 = forward_propagation(X_train, W1, b1, W2, b2)
    dW1, db1, dW2, db2 = backward_propagation(X_train, y_train, Z1, A1, Z2, A2, W1, W2)
    W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)
    if epoch % 10 == 0:
        predictions = np.argmax(A2, axis=1)
        accuracy = np.mean(predictions == np.argmax(y_train, axis=1))
        print(f"Epoch {epoch}, Accuracy: {accuracy}")

Epoch 0, Accuracy: 0.10598333333333333
Epoch 10, Accuracy: 0.5884333333333334
Epoch 20, Accuracy: 0.725
Epoch 30, Accuracy: 0.7744166666666666
Epoch 40, Accuracy: 0.8026166666666666
Epoch 50, Accuracy: 0.82155
Epoch 60, Accuracy: 0.83505
Epoch 70, Accuracy: 0.8442333333333333
Epoch 80, Accuracy: 0.8508166666666667
Epoch 90, Accuracy: 0.8573
Epoch 100, Accuracy: 0.86235
Epoch 110, Accuracy: 0.8665166666666667
Epoch 120, Accuracy: 0.87025
Epoch 130, Accuracy: 0.87295
Epoch 140, Accuracy: 0.8755
