In [14]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Загрузка данных MNIST с использованием TensorFlow
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Нормализация данных
X_train, X_test = X_train / 255.0, X_test / 255.0

# Выбор первых 500 элементов
X_train = X_train[:500]
y_train = y_train[:500]
X_test = X_test[:100]
y_test = y_test[:100]

# Преобразование данных в двумерный массив
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# One-hot encoding of labels
def one_hot_encode(y, num_classes):
    return np.eye(num_classes)[y]

y_train_encoded = one_hot_encode(y_train, 10)
y_test_encoded = one_hot_encode(y_test, 10)

class ActivationFunction:
    def __init__(self):
        pass

    def activate(self, z):
        raise NotImplementedError

    def derivative(self, z):
        raise NotImplementedError

class ReLU(ActivationFunction):
    def activate(self, z):
        return np.maximum(0, z)

    def derivative(self, z):
        return np.where(z > 0, 1, 0)

class GELU(ActivationFunction):
    def activate(self, z):
        return 0.5 * z * (1 + np.tanh(np.sqrt(2/np.pi) * (z + 0.044715 * z**3)))

    def derivative(self, z):
        tanh = np.tanh(np.sqrt(2/np.pi) * (z + 0.044715 * z**3))
        derivative = 0.5 * (1 + tanh) + 0.5 * z * (np.sqrt(2/np.pi) * (1 + 0.13403 * z**2) * (1 - tanh**2))
        return derivative

class Softmax(ActivationFunction):
    def activate(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def derivative(self, z):
        # Softmax derivative is usually handled differently in practice
        pass

class NeuralNetwork:
    def __init__(self, input_size, layer_config):
        self.layers = []
        self.weights = []
        self.biases = []

        # Initialize layers
        prev_size = input_size
        for config in layer_config:
            activation = config['activation']()
            self.layers.append({
                'size': config['size'],
                'activation': activation,
                'z': None,
                'a': None
            })
            current_size = config['size']
            self.weights.append(np.random.randn(prev_size, current_size) * 0.01)
            self.biases.append(np.zeros((1, current_size)))
            prev_size = current_size

    def forward_propagation(self, X):
        a = X
        for i, layer in enumerate(self.layers):
            z = np.dot(a, self.weights[i]) + self.biases[i]
            a = layer['activation'].activate(z)
            layer['z'] = z
            layer['a'] = a
        return a

    def backward_propagation(self, X, y, learning_rate):
        m = X.shape[0]
        dz = (self.layers[-1]['a'] - y) / m

        for i in reversed(range(len(self.layers))):
            dW = np.dot(self.layers[i-1]['a'].T, dz) if i != 0 else np.dot(X.T, dz)
            db = np.sum(dz, axis=0, keepdims=True)
            if i != 0:
                dz = np.dot(dz, self.weights[i].T) * self.layers[i-1]['activation'].derivative(self.layers[i-1]['z'])
            self.weights[i] -= learning_rate * dW
            self.biases[i] -= learning_rate * db

    def train(self, X, y, learning_rate, epochs):
        for epoch in range(epochs):
            output = self.forward_propagation(X)
            self.backward_propagation(X, y, learning_rate)
            loss = self.cross_entropy_loss(y, output)
            print(f'Epoch {epoch}, Loss: {loss}')
            print(f'Accuracy {self.accuracy(y_test, self.predict(X_test))}')

    def predict(self, X):
        output = self.forward_propagation(X)
        return np.argmax(output, axis=1)

    def accuracy(self, y_true, y_pred):
        return np.mean(y_true == y_pred)

    def cross_entropy_loss(self, y_true, y_pred):
        m = y_true.shape[0]
        return -np.sum(np.log(y_pred + 1e-15) * y_true) / m

# Example usage with 3 hidden layers
input_size = X_train.shape[1]
layer_config = [
    {'size': 128, 'activation': ReLU},
    {'size': 64, 'activation': GELU},
    {'size': 10, 'activation': Softmax}  # Output layer with Softmax
]

nn = NeuralNetwork(input_size, layer_config)
nn.train(X_train, y_train_encoded, learning_rate=0.05, epochs=1000)

Epoch 0, Loss: 2.3025565478578285
Accuracy 0.14
Epoch 1, Loss: 2.302442941523964
Accuracy 0.14
Epoch 2, Loss: 2.302330454476418
Accuracy 0.14
Epoch 3, Loss: 2.3022190751429226
Accuracy 0.14
Epoch 4, Loss: 2.3021087935754787
Accuracy 0.14
Epoch 5, Loss: 2.3019995984946084
Accuracy 0.14
Epoch 6, Loss: 2.301891479771177
Accuracy 0.14
Epoch 7, Loss: 2.3017844263597476
Accuracy 0.14
Epoch 8, Loss: 2.3016784281088105
Accuracy 0.14
Epoch 9, Loss: 2.3015734748220122
Accuracy 0.14
Epoch 10, Loss: 2.3014695560280325
Accuracy 0.14
Epoch 11, Loss: 2.3013666617519903
Accuracy 0.14
Epoch 12, Loss: 2.301264781913623
Accuracy 0.14
Epoch 13, Loss: 2.30116390637104
Accuracy 0.14
Epoch 14, Loss: 2.3010640247741514
Accuracy 0.14
Epoch 15, Loss: 2.3009651277778524
Accuracy 0.14
Epoch 16, Loss: 2.300867206048314
Accuracy 0.14
Epoch 17, Loss: 2.3007702503136205
Accuracy 0.14
Epoch 18, Loss: 2.300674250784455
Accuracy 0.14
Epoch 19, Loss: 2.30057919799479
Accuracy 0.14
Epoch 20, Loss: 2.3004850828211083
Accur