In [90]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import OneHotEncoder

Нормализуем данные

In [91]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 28*28).T / 255.0
X_test = X_test.reshape(-1, 28*28).T / 255.0
input_size = X_train.shape[0]
hidden_size = 64
output_size = 10


In [92]:
def encode(Y):
    result = np.zeros((Y.size, 10))
    result[np.arange(Y.size), Y] = 1
    return result.T

In [93]:
def decode(Y):
    return np.argmax(Y, axis=0)

In [94]:
Y_train = encode(Y_train)
Y_test = encode(Y_test)

Функции активации


In [95]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [96]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [97]:
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

Создание модели


In [98]:
class TwoLayerNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.W1 = np.random.randn(hidden_size, input_size) * 0.01
        self.b1 = np.zeros((hidden_size, 1))
        self.W2 = np.random.randn(output_size, hidden_size) * 0.01
        self.b2 = np.zeros((output_size, 1))
        self.learning_rate = learning_rate

    def forward(self, X):
        self.Z1 = np.dot(self.W1, X) + self.b1
        self.A1 = sigmoid(self.Z1)
        self.Z2 = np.dot(self.W2, self.A1) + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2

    def backward(self, X, Y):
        m = X.shape[1]
        dZ2 = 10 - Y
        dW2 = np.dot(dZ2, self.A1.T) / m
        db2 = np.sum(dZ2, axis=1, keepdims=True) / m

        dA1 = np.dot(self.W2.T, dZ2)
        dZ1 = dA1 * sigmoid_derivative(self.A1)
        dW1 = np.dot(dZ1, X.T) / m
        db1 = np.sum(dZ1, axis=1, keepdims=True) / m

        self.W1 -= self.learning_rate * dW1
        self.b1 -= self.learning_rate * db1
        self.W2 -= self.learning_rate * dW2
        self.b2 -= self.learning_rate * db2

    def train(self, X, Y, iterations):
        for i in range(iterations):
            self.forward(X)
            self.backward(X, Y)
            if i % 100 == 0:
                loss = self.compute_loss(Y)
                print(f"Iteration {i}, Loss: {loss:.4f}")

    def compute_loss(self, Y):
        m = Y.shape[0]
        log_probs = -np.log(self.A2[np.arange(m), np.argmax(Y, axis=1)])
        loss = np.sum(log_probs) / m
        return loss

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=0)

In [99]:
def accuracy(predictions, labels):
    return np.mean(predictions == labels)

In [100]:
iterations = 100
learning_rate = 0.1
nn = TwoLayerNN(input_size, hidden_size, output_size, learning_rate)
nn.train(X_train, Y_train, iterations)



Iteration 0, Loss: 11.0026


In [103]:
test_predictions = nn.predict(X_test)
acc = accuracy(test_predictions, Y_test)
print(f"Test accuracy: {acc * 100:.2f}%")

Test accuracy: 86.56%
