In [None]:
import numpy as np
from keras.api.datasets import mnist
from keras.api.utils import to_categorical

# 活性化関数とその導関数
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), np.argmax(y_true, axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss

def cross_entropy_loss_derivative(y_true, y_pred):
    m = y_true.shape[0]
    grad = y_pred.copy()
    grad[range(m), np.argmax(y_true, axis=1)] -= 1
    grad = grad / m
    return grad

# ネットワーククラス
class FourLayerNet:
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
        self.params = {
            'W1': np.random.randn(input_size, hidden_size1) * 0.01,
            'b1': np.zeros((1, hidden_size1)),
            'W2': np.random.randn(hidden_size1, hidden_size2) * 0.01,
            'b2': np.zeros((1, hidden_size2)),
            'W3': np.random.randn(hidden_size2, hidden_size3) * 0.01,
            'b3': np.zeros((1, hidden_size3)),
            'W4': np.random.randn(hidden_size3, output_size) * 0.01,
            'b4': np.zeros((1, output_size))
        }

    def forward(self, X):
        self.cache = {}
        self.cache['Z1'] = np.dot(X, self.params['W1']) + self.params['b1']
        self.cache['A1'] = relu(self.cache['Z1'])
        self.cache['Z2'] = np.dot(self.cache['A1'], self.params['W2']) + self.params['b2']
        self.cache['A2'] = relu(self.cache['Z2'])
        self.cache['Z3'] = np.dot(self.cache['A2'], self.params['W3']) + self.params['b3']
        self.cache['A3'] = relu(self.cache['Z3'])
        self.cache['Z4'] = np.dot(self.cache['A3'], self.params['W4']) + self.params['b4']
        self.cache['A4'] = softmax(self.cache['Z4'])
        return self.cache['A4']

    def backward(self, X, y):
        m = X.shape[0]
        grads = {}

        dZ4 = cross_entropy_loss_derivative(y, self.cache['A4'])
        grads['dW4'] = np.dot(self.cache['A3'].T, dZ4)
        grads['db4'] = np.sum(dZ4, axis=0, keepdims=True)

        test = self.params['W4'].T

        dA3 = np.dot(dZ4, self.params['W4'].T)
        dZ3 = dA3 * relu_derivative(self.cache['Z3'])
        grads['dW3'] = np.dot(self.cache['A2'].T, dZ3)
        grads['db3'] = np.sum(dZ3, axis=0, keepdims=True)

        dA2 = np.dot(dZ3, self.params['W3'].T)
        dZ2 = dA2 * relu_derivative(self.cache['Z2'])
        grads['dW2'] = np.dot(self.cache['A1'].T, dZ2)
        grads['db2'] = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = np.dot(dZ2, self.params['W2'].T)
        dZ1 = dA1 * relu_derivative(self.cache['Z1'])
        grads['dW1'] = np.dot(X.T, dZ1)
        grads['db1'] = np.sum(dZ1, axis=0, keepdims=True)

        return grads

    def update_params(self, grads, learning_rate):
        for key in self.params.keys():
            self.params[key] -= learning_rate * grads['d' + key]

    def compute_loss(self, y_true, y_pred):
        return cross_entropy_loss(y_true, y_pred)
    
    def predict(self, X):
        return self.forward(X)

# データの前処理
def load_mnist_data():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(X_train.shape[0], -1).astype(np.float32) / 255
    X_test = X_test.reshape(X_test.shape[0], -1).astype(np.float32) / 255
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)
    return X_train, y_train, X_test, y_test

# トレーニング関数
def train(X, y, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, epochs, batch_size, learning_rate):
    net = FourLayerNet(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)
    
    for epoch in range(epochs):
        permutation = np.random.permutation(X.shape[0])
        X_shuffled = X[permutation]
        y_shuffled = y[permutation]

        for i in range(0, X.shape[0], batch_size):
            X_batch = X_shuffled[i:i + batch_size]
            y_batch = y_shuffled[i:i + batch_size]
            y_pred = net.forward(X_batch)
            loss = net.compute_loss(y_batch, y_pred)
            grads = net.backward(X_batch, y_batch)
            net.update_params(grads, learning_rate)
        
        print(f'Epoch {epoch}/{epochs} - Loss: {loss:.4f}')

    return net

X_train, y_train, X_test, y_test = load_mnist_data()
input_size = X_train.shape[1]
hidden_size1 = 256
hidden_size2 = 128
hidden_size3 = 32
output_size = 10
epochs = 100
batch_size = 2000
learning_rate = 0.5

net = train(X_train, y_train, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, epochs, batch_size ,learning_rate)


## predict test data

In [None]:
y_pre = net.predict(X_test)

miss_count = 0
conf_matrix = np.zeros((10, 10), dtype=int)

for i in range(len(y_pre)):
    answer = np.argmax(y_test[i])
    result = np.argmax(y_pre[i])
    conf_matrix[answer][result] += 1
    if answer != result: miss_count +=1
    # print(answer, result, "x" if answer != result else "")

import pandas as pd 

TP = np.array([conf_matrix[i][i] for i in range(10)])
FN = np.array([np.sum(conf_matrix[i]) - conf_matrix[i][i] for i in range(10) ])
FP = np.array([np.sum(conf_matrix.T[i]) - conf_matrix[i][i] for i in range(10) ])
TN = np.array([np.sum(conf_matrix) - TP[i] - FN[i] - FP[i] for i in range(10)])
print(conf_matrix)

Precision = np.array([TP / (TP + FP)])
Recall = np.array([TP / (TP + FN)])
F_value = np.array([ 2 * Recall * Precision / (Recall + Precision)])

df = pd.DataFrame({"Precision": Precision[0], "Recall": Recall[0], "F_value": F_value[0][0]})
print("\n\n", df)

acc = (len(y_pre) - miss_count) / len(y_pre)

print(f"\nAccuracy: {acc}")

In [None]:
import numpy as np
from PIL import Image
import os

with os.scandir('./input') as entries:
    items = [entry.name for entry in entries if entry.is_file()]

data = []
file_name = []

for file in items:
    if file == '.DS_Store':
        continue
    data.append(Image.open(f'./input/{file}').convert('L'))
    file_name.append(file)

data = np.array(data)
x_pre = data.reshape(len(data), 784)
x_pre = x_pre.astype('float32')
x_pre /= 255
y_pre = net.predict(x_pre)
print(y_pre)

for i in range(len(data)):
    print(file_name[i], np.argmax(y_pre[i]))