In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784')
X, y = mnist.data.astype('float32'), mnist.target.astype('int')
X /= 255.0
encoder = OneHotEncoder(sparse=False)
y_onehot = encoder.fit_transform(y.to_numpy().reshape(-1, 1))
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)
input_size = 784
hidden_size1 = 128
hidden_size2 = 64
output_size = 10
learning_rate = 0.01
epochs = 10

np.random.seed(42)
weights = {
    'W1': np.random.randn(input_size, hidden_size1),
    'W2': np.random.randn(hidden_size1, hidden_size2),
    'W3': np.random.randn(hidden_size2, output_size),
}

biases = {
    'b1': np.zeros((1, hidden_size1)),
    'b2': np.zeros((1, hidden_size2)),
    'b3': np.zeros((1, output_size)),
}


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)


def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    return -np.sum(y_true * np.log(y_pred + 1e-10)) / m


for epoch in range(epochs):
    # Forward pass
    z1 = np.dot(X_train, weights['W1']) + biases['b1']
    a1 = sigmoid(z1)
    
    z2 = np.dot(a1, weights['W2']) + biases['b2']
    a2 = sigmoid(z2)
    
    z3 = np.dot(a2, weights['W3']) + biases['b3']
    a3 = softmax(z3)
    
    loss = cross_entropy_loss(y_train, a3)
   
    dz3 = a3 - y_train
    dw3 = np.dot(a2.T, dz3)
    db3 = np.sum(dz3, axis=0, keepdims=True)
    
    dz2 = np.dot(dz3, weights['W3'].T) * (a2 * (1 - a2))
    dw2 = np.dot(a1.T, dz2)
    db2 = np.sum(dz2, axis=0, keepdims=True)
    
    dz1 = np.dot(dz2, weights['W2'].T) * (a1 * (1 - a1))
    dw1 = np.dot(X_train.T, dz1)
    db1 = np.sum(dz1, axis=0, keepdims=True)
    
    weights['W1'] -= learning_rate * dw1
    biases['b1'] -= learning_rate * db1
    
    weights['W2'] -= learning_rate * dw2
    biases['b2'] -= learning_rate * db2
    
    weights['W3'] -= learning_rate * dw3
    biases['b3'] -= learning_rate * db3
    
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss}")

z1_test = np.dot(X_test, weights['W1']) + biases['b1']
a1_test = sigmoid(z1_test)

z2_test = np.dot(a1_test, weights['W2']) + biases['b2']
a2_test = sigmoid(z2_test)

z3_test = np.dot(a2_test, weights['W3']) + biases['b3']
a3_test = softmax(z3_test)

predictions = np.argmax(a3_test, axis=1)
actual_labels = np.argmax(y_test, axis=1)


accuracy = np.mean(predictions == actual_labels)
print(f"Accuracy on test set: {accuracy}")


  warn(


Epoch 1/10, Loss: 5.648567830046001


  return 1 / (1 + np.exp(-x))


Epoch 2/10, Loss: 20.74557616652671


  return 1 / (1 + np.exp(-x))


Epoch 3/10, Loss: 20.028478107804627


  return 1 / (1 + np.exp(-x))


Epoch 4/10, Loss: 20.777541056985793


  return 1 / (1 + np.exp(-x))


Epoch 5/10, Loss: 20.444899746228607


  return 1 / (1 + np.exp(-x))


Epoch 6/10, Loss: 20.64514241413806


  return 1 / (1 + np.exp(-x))


Epoch 7/10, Loss: 20.772606946072212


  return 1 / (1 + np.exp(-x))


Epoch 8/10, Loss: 20.719154077841768


  return 1 / (1 + np.exp(-x))


Epoch 9/10, Loss: 20.6788588387142


  return 1 / (1 + np.exp(-x))


Epoch 10/10, Loss: 20.752459326508426
Accuracy on test set: 0.09092857142857143


  return 1 / (1 + np.exp(-x))
