In [2]:
import numpy as np
from keras.api.datasets import mnist
from keras.api.utils import to_categorical

# 活性化関数とその導関数
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), np.argmax(y_true, axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss

def cross_entropy_loss_derivative(y_true, y_pred):
    m = y_true.shape[0]
    grad = y_pred.copy()
    grad[range(m), np.argmax(y_true, axis=1)] -= 1
    grad = grad / m
    return grad

# ネットワーククラス
class FourLayerNet:
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
        self.params = {
            'W1': np.random.randn(input_size, hidden_size1) * 0.01,
            'b1': np.zeros((1, hidden_size1)),
            'W2': np.random.randn(hidden_size1, hidden_size2) * 0.01,
            'b2': np.zeros((1, hidden_size2)),
            'W3': np.random.randn(hidden_size2, hidden_size3) * 0.01,
            'b3': np.zeros((1, hidden_size3)),
            'W4': np.random.randn(hidden_size3, output_size) * 0.01,
            'b4': np.zeros((1, output_size))
        }

    def forward(self, X):
        self.cache = {}
        self.cache['Z1'] = np.dot(X, self.params['W1']) + self.params['b1']
        self.cache['A1'] = relu(self.cache['Z1'])
        self.cache['Z2'] = np.dot(self.cache['A1'], self.params['W2']) + self.params['b2']
        self.cache['A2'] = relu(self.cache['Z2'])
        self.cache['Z3'] = np.dot(self.cache['A2'], self.params['W3']) + self.params['b3']
        self.cache['A3'] = relu(self.cache['Z3'])
        self.cache['Z4'] = np.dot(self.cache['A3'], self.params['W4']) + self.params['b4']
        self.cache['A4'] = softmax(self.cache['Z4'])
        return self.cache['A4']

    def backward(self, X, y):
        m = X.shape[0]
        grads = {}

        dZ4 = cross_entropy_loss_derivative(y, self.cache['A4'])
        grads['dW4'] = np.dot(self.cache['A3'].T, dZ4)
        grads['db4'] = np.sum(dZ4, axis=0, keepdims=True)

        test = self.params['W4'].T

        dA3 = np.dot(dZ4, self.params['W4'].T)
        dZ3 = dA3 * relu_derivative(self.cache['Z3'])
        grads['dW3'] = np.dot(self.cache['A2'].T, dZ3)
        grads['db3'] = np.sum(dZ3, axis=0, keepdims=True)

        dA2 = np.dot(dZ3, self.params['W3'].T)
        dZ2 = dA2 * relu_derivative(self.cache['Z2'])
        grads['dW2'] = np.dot(self.cache['A1'].T, dZ2)
        grads['db2'] = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = np.dot(dZ2, self.params['W2'].T)
        dZ1 = dA1 * relu_derivative(self.cache['Z1'])
        grads['dW1'] = np.dot(X.T, dZ1)
        grads['db1'] = np.sum(dZ1, axis=0, keepdims=True)

        return grads

    def update_params(self, grads, learning_rate):
        for key in self.params.keys():
            self.params[key] -= learning_rate * grads['d' + key]

    def compute_loss(self, y_true, y_pred):
        return cross_entropy_loss(y_true, y_pred)
    
    def predict(self, X):
        return self.forward(X)

# データの前処理
def load_mnist_data():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(X_train.shape[0], -1).astype(np.float32) / 255
    X_test = X_test.reshape(X_test.shape[0], -1).astype(np.float32) / 255
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)
    return X_train, y_train, X_test, y_test

# トレーニング関数
def train(X, y, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, epochs, learning_rate):
    net = FourLayerNet(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)
    
    for epoch in range(epochs):
        y_pred = net.forward(X)
        loss = net.compute_loss(y, y_pred)
        grads = net.backward(X, y)
        net.update_params(grads, learning_rate)
        
        print(f'Epoch {epoch}/{epochs} - Loss: {loss:.4f}')

    return net


X_train, y_train, X_test, y_test = load_mnist_data()
input_size = X_train.shape[1]
hidden_size1 = 256
hidden_size2 = 128
hidden_size3 = 32
output_size = 10
epochs = 800
learning_rate = 1

net = train(X_train, y_train, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, epochs, learning_rate)


KeyboardInterrupt: 

In [26]:
y_pre = net.predict(X_test)

miss_count = 0

for i in range(len(y_pre)):
    answer = np.argmax(y_test[i])
    result = np.argmax(y_pre[i])
    if answer != result:
        miss_count = miss_count + 1
    print(answer, result, "x" if answer != result else "")

print(f'{miss_count} / {len(y_pre)}')

7 7 
2 2 
1 1 
0 0 
4 4 
1 1 
4 4 
9 9 
5 4 x
9 9 
0 0 
6 6 
9 9 
0 0 
1 1 
5 5 
9 9 
7 7 
3 3 
4 4 
9 9 
6 6 
6 6 
5 5 
4 4 
0 0 
7 7 
4 4 
0 0 
1 1 
3 3 
1 1 
3 3 
4 6 x
7 7 
2 2 
7 7 
1 1 
2 3 x
1 1 
1 1 
7 7 
4 4 
2 2 
3 3 
5 3 x
1 1 
2 2 
4 4 
4 4 
6 6 
3 3 
5 5 
5 5 
6 6 
0 0 
4 4 
1 1 
9 9 
5 5 
7 7 
8 8 
9 4 x
3 3 
7 7 
4 4 
6 4 x
4 4 
3 3 
0 0 
7 7 
0 0 
2 2 
9 9 
1 1 
7 7 
3 3 
2 7 x
9 9 
7 7 
7 9 x
6 6 
2 2 
7 7 
8 8 
4 4 
7 7 
3 3 
6 6 
1 1 
3 3 
6 6 
9 9 
3 3 
1 1 
4 4 
1 1 
7 9 x
6 6 
9 9 
6 6 
0 0 
5 5 
4 4 
9 9 
9 9 
2 2 
1 1 
9 9 
4 4 
8 8 
7 7 
3 3 
9 9 
7 7 
4 4 
4 4 
4 4 
9 9 
2 9 x
5 3 x
4 4 
7 7 
6 6 
7 4 x
9 9 
0 0 
5 5 
8 8 
5 5 
6 6 
6 6 
5 5 
7 7 
8 8 
1 1 
0 0 
1 1 
6 6 
4 4 
6 6 
7 7 
3 3 
1 1 
7 7 
1 1 
8 8 
2 2 
0 0 
2 9 x
9 9 
9 8 x
5 5 
5 5 
1 1 
5 5 
6 6 
0 0 
3 3 
4 4 
4 4 
6 6 
5 5 
4 4 
6 6 
5 3 x
4 4 
5 5 
1 1 
4 4 
4 4 
7 7 
2 3 x
3 3 
2 2 
7 9 x
1 1 
8 8 
1 1 
8 8 
1 1 
8 8 
5 5 
0 0 
8 8 
9 4 x
2 2 
5 3 x
0 0 
1 1 
1 1 
1 1 
0 0 
9 9 
0 0 
3 3 
1