In [9]:
import numpy as np
from keras.api.datasets import mnist
from keras.api.utils import to_categorical

# 活性化関数とその導関数
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), np.argmax(y_true, axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss

def cross_entropy_loss_derivative(y_true, y_pred):
    m = y_true.shape[0]
    grad = y_pred.copy()
    grad[range(m), np.argmax(y_true, axis=1)] -= 1
    grad = grad / m
    return grad

# ネットワーククラス
class FourLayerNet:
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
        self.params = {
            'W1': np.random.randn(input_size, hidden_size1) * 0.01,
            'b1': np.zeros((1, hidden_size1)),
            'W2': np.random.randn(hidden_size1, hidden_size2) * 0.01,
            'b2': np.zeros((1, hidden_size2)),
            'W3': np.random.randn(hidden_size2, hidden_size3) * 0.01,
            'b3': np.zeros((1, hidden_size3)),
            'W4': np.random.randn(hidden_size3, output_size) * 0.01,
            'b4': np.zeros((1, output_size))
        }

    def forward(self, X):
        self.cache = {}
        self.cache['Z1'] = np.dot(X, self.params['W1']) + self.params['b1']
        self.cache['A1'] = relu(self.cache['Z1'])
        self.cache['Z2'] = np.dot(self.cache['A1'], self.params['W2']) + self.params['b2']
        self.cache['A2'] = relu(self.cache['Z2'])
        self.cache['Z3'] = np.dot(self.cache['A2'], self.params['W3']) + self.params['b3']
        self.cache['A3'] = relu(self.cache['Z3'])
        self.cache['Z4'] = np.dot(self.cache['A3'], self.params['W4']) + self.params['b4']
        self.cache['A4'] = softmax(self.cache['Z4'])
        return self.cache['A4']

    def backward(self, X, y):
        m = X.shape[0]
        grads = {}

        dZ4 = cross_entropy_loss_derivative(y, self.cache['A4'])
        grads['dW4'] = np.dot(self.cache['A3'].T, dZ4)
        grads['db4'] = np.sum(dZ4, axis=0, keepdims=True)

        test = self.params['W4'].T

        dA3 = np.dot(dZ4, self.params['W4'].T)
        dZ3 = dA3 * relu_derivative(self.cache['Z3'])
        grads['dW3'] = np.dot(self.cache['A2'].T, dZ3)
        grads['db3'] = np.sum(dZ3, axis=0, keepdims=True)

        dA2 = np.dot(dZ3, self.params['W3'].T)
        dZ2 = dA2 * relu_derivative(self.cache['Z2'])
        grads['dW2'] = np.dot(self.cache['A1'].T, dZ2)
        grads['db2'] = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = np.dot(dZ2, self.params['W2'].T)
        dZ1 = dA1 * relu_derivative(self.cache['Z1'])
        grads['dW1'] = np.dot(X.T, dZ1)
        grads['db1'] = np.sum(dZ1, axis=0, keepdims=True)

        return grads

    def update_params(self, grads, learning_rate):
        for key in self.params.keys():
            self.params[key] -= learning_rate * grads['d' + key]

    def compute_loss(self, y_true, y_pred):
        return cross_entropy_loss(y_true, y_pred)
    
    def predict(self, X):
        return self.forward(X)

# データの前処理
def load_mnist_data():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(X_train.shape[0], -1).astype(np.float32) / 255
    X_test = X_test.reshape(X_test.shape[0], -1).astype(np.float32) / 255
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)
    return X_train, y_train, X_test, y_test

# トレーニング関数
def train(X, y, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, epochs, batch_size, learning_rate):
    net = FourLayerNet(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)
    
    for epoch in range(epochs):
        permutation = np.random.permutation(X.shape[0])
        X_shuffled = X[permutation]
        y_shuffled = y[permutation]

        for i in range(0, X.shape[0], batch_size):
            X_batch = X_shuffled[i:i + batch_size]
            y_batch = y_shuffled[i:i + batch_size]
            y_pred = net.forward(X_batch)
            loss = net.compute_loss(y_batch, y_pred)
            grads = net.backward(X_batch, y_batch)
            net.update_params(grads, learning_rate)
        
        print(f'Epoch {epoch}/{epochs} - Loss: {loss:.4f}')

    return net

X_train, y_train, X_test, y_test = load_mnist_data()
input_size = X_train.shape[1]
hidden_size1 = 256
hidden_size2 = 128
hidden_size3 = 32
output_size = 10
epochs = 100
batch_size = 2000
learning_rate = 0.5

net = train(X_train, y_train, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, epochs, batch_size ,learning_rate)


Epoch 0/100 - Loss: 2.3035
Epoch 1/100 - Loss: 2.3012
Epoch 2/100 - Loss: 2.3026
Epoch 3/100 - Loss: 2.3023
Epoch 4/100 - Loss: 2.3005
Epoch 5/100 - Loss: 2.3011
Epoch 6/100 - Loss: 2.3012
Epoch 7/100 - Loss: 2.3020
Epoch 8/100 - Loss: 2.3000
Epoch 9/100 - Loss: 2.3017
Epoch 10/100 - Loss: 2.2992
Epoch 11/100 - Loss: 2.3008
Epoch 12/100 - Loss: 2.2984
Epoch 13/100 - Loss: 2.2996
Epoch 14/100 - Loss: 2.2935
Epoch 15/100 - Loss: 2.0522
Epoch 16/100 - Loss: 1.9978
Epoch 17/100 - Loss: 1.7849
Epoch 18/100 - Loss: 1.4719
Epoch 19/100 - Loss: 1.3390
Epoch 20/100 - Loss: 1.0112
Epoch 21/100 - Loss: 0.9341
Epoch 22/100 - Loss: 0.7087
Epoch 23/100 - Loss: 0.6002
Epoch 24/100 - Loss: 0.4197
Epoch 25/100 - Loss: 0.3024
Epoch 26/100 - Loss: 0.2248
Epoch 27/100 - Loss: 0.2465
Epoch 28/100 - Loss: 0.1973
Epoch 29/100 - Loss: 0.1507
Epoch 30/100 - Loss: 5.2387
Epoch 31/100 - Loss: 0.9575
Epoch 32/100 - Loss: 0.4752
Epoch 33/100 - Loss: 0.2435
Epoch 34/100 - Loss: 0.2120
Epoch 35/100 - Loss: 0.1626
Ep

## predict test data

In [10]:
y_pre = net.predict(X_test)

miss_count = 0
conf_matrix = np.zeros((10, 10), dtype=int)

for i in range(len(y_pre)):
    answer = np.argmax(y_test[i])
    result = np.argmax(y_pre[i])
    conf_matrix[answer][result] += 1
    if answer != result: miss_count +=1
    # print(answer, result, "x" if answer != result else "")

import pandas as pd 

TP = np.array([conf_matrix[i][i] for i in range(10)])
FN = np.array([np.sum(conf_matrix[i]) - conf_matrix[i][i] for i in range(10) ])
FP = np.array([np.sum(conf_matrix.T[i]) - conf_matrix[i][i] for i in range(10) ])
TN = np.array([np.sum(conf_matrix) - TP[i] - FN[i] - FP[i] for i in range(10)])
print(conf_matrix)

Precision = np.array([TP / (TP + FP)])
Recall = np.array([TP / (TP + FN)])
F_value = np.array([ 2 * Recall * Precision / (Recall + Precision)])

df = pd.DataFrame({"Precision": Precision[0], "Recall": Recall[0], "F_value": F_value[0][0]})
print("\n\n", df)

acc = (len(y_pre) - miss_count) / len(y_pre)

print(f"\nAccuracy: {acc}")

[[ 969    0    1    1    1    4    0    1    3    0]
 [   0 1122    3    1    0    1    3    1    4    0]
 [   2    4  995   10    2    0    6    9    2    2]
 [   0    0   13  971    1   12    0    6    4    3]
 [   1    2    2    0  946    0    8    2    3   18]
 [   5    0    0   20    1  847    6    4    6    3]
 [   5    3    1    0    7    8  932    0    2    0]
 [   1    4    8    3    1    1    0 1002    1    7]
 [   3    1    5   13    3    9    6    5  924    5]
 [   6    7    2    7   15    4    1   10    6  951]]


    Precision    Recall   F_value
0   0.976815  0.988776  0.982759
1   0.981627  0.988546  0.985075
2   0.966019  0.964147  0.965082
3   0.946394  0.961386  0.953831
4   0.968270  0.963340  0.965799
5   0.955982  0.949552  0.952756
6   0.968815  0.972860  0.970833
7   0.963462  0.974708  0.969052
8   0.967539  0.948665  0.958009
9   0.961577  0.942517  0.951952

Accuracy: 0.9659


In [11]:
import numpy as np
from PIL import Image
import os

with os.scandir('./input') as entries:
    items = [entry.name for entry in entries if entry.is_file()]

data = []
file_name = []

for file in items:
    if file == '.DS_Store':
        continue
    data.append(Image.open(f'./input/{file}').convert('L'))
    file_name.append(file)

data = np.array(data)
x_pre = data.reshape(len(data), 784)
x_pre = x_pre.astype('float32')
x_pre /= 255
y_pre = net.predict(x_pre)
print(y_pre)

for i in range(len(data)):
    print(file_name[i], np.argmax(y_pre[i]))

[[1.49390477e-03 5.89338066e-04 2.91491985e-01 4.43597974e-02
  1.23365806e-03 7.78064583e-02 2.46905910e-03 5.47577769e-01
  1.59122691e-02 1.70657611e-02]
 [1.73087517e-03 9.41020522e-03 9.64326702e-01 7.98089237e-03
  2.38574456e-04 1.29773299e-03 4.25021560e-03 7.46583155e-03
  2.96731044e-03 3.31660245e-04]
 [1.41580238e-04 4.92007468e-04 1.47632573e-03 9.42676587e-01
  5.65276016e-06 3.74485153e-02 4.57690266e-04 1.47745726e-03
  8.02281962e-03 7.80136451e-03]
 [2.75010743e-03 5.53433621e-05 2.39541564e-02 8.91365616e-03
  1.24928042e-03 2.83206457e-01 6.12551736e-01 9.35667379e-05
  6.69561971e-02 2.69499246e-04]
 [4.59793257e-05 5.89944894e-07 4.72923605e-06 7.85358771e-03
  3.11640829e-08 9.91468425e-01 6.41742427e-06 3.83102700e-06
  1.14532828e-04 5.01875986e-04]
 [6.17141532e-04 2.42367277e-05 1.39934171e-04 4.39715035e-03
  5.44285941e-05 9.68392015e-01 1.31199105e-03 5.69728421e-05
  2.18022129e-02 3.20391682e-03]
 [8.69025984e-03 1.08366906e-02 7.24179627e-02 1.42359378e