In [2]:
import numpy as np
from keras.api.datasets import mnist
from keras.api.utils import to_categorical

# 活性化関数とその導関数
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), np.argmax(y_true, axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss

def cross_entropy_loss_derivative(y_true, y_pred):
    m = y_true.shape[0]
    grad = y_pred.copy()
    grad[range(m), np.argmax(y_true, axis=1)] -= 1
    grad = grad / m
    return grad

# ネットワーククラス
class FourLayerNet:
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
        self.params = {
            'W1': np.random.randn(input_size, hidden_size1) * 0.01,
            'b1': np.zeros((1, hidden_size1)),
            'W2': np.random.randn(hidden_size1, hidden_size2) * 0.01,
            'b2': np.zeros((1, hidden_size2)),
            'W3': np.random.randn(hidden_size2, hidden_size3) * 0.01,
            'b3': np.zeros((1, hidden_size3)),
            'W4': np.random.randn(hidden_size3, output_size) * 0.01,
            'b4': np.zeros((1, output_size))
        }

    def forward(self, X):
        self.cache = {}
        self.cache['Z1'] = np.dot(X, self.params['W1']) + self.params['b1']
        self.cache['A1'] = relu(self.cache['Z1'])
        self.cache['Z2'] = np.dot(self.cache['A1'], self.params['W2']) + self.params['b2']
        self.cache['A2'] = relu(self.cache['Z2'])
        self.cache['Z3'] = np.dot(self.cache['A2'], self.params['W3']) + self.params['b3']
        self.cache['A3'] = relu(self.cache['Z3'])
        self.cache['Z4'] = np.dot(self.cache['A3'], self.params['W4']) + self.params['b4']
        self.cache['A4'] = softmax(self.cache['Z4'])
        return self.cache['A4']

    def backward(self, X, y):
        m = X.shape[0]
        grads = {}

        dZ4 = cross_entropy_loss_derivative(y, self.cache['A4'])
        grads['dW4'] = np.dot(self.cache['A3'].T, dZ4)
        grads['db4'] = np.sum(dZ4, axis=0, keepdims=True)

        test = self.params['W4'].T

        dA3 = np.dot(dZ4, self.params['W4'].T)
        dZ3 = dA3 * relu_derivative(self.cache['Z3'])
        grads['dW3'] = np.dot(self.cache['A2'].T, dZ3)
        grads['db3'] = np.sum(dZ3, axis=0, keepdims=True)

        dA2 = np.dot(dZ3, self.params['W3'].T)
        dZ2 = dA2 * relu_derivative(self.cache['Z2'])
        grads['dW2'] = np.dot(self.cache['A1'].T, dZ2)
        grads['db2'] = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = np.dot(dZ2, self.params['W2'].T)
        dZ1 = dA1 * relu_derivative(self.cache['Z1'])
        grads['dW1'] = np.dot(X.T, dZ1)
        grads['db1'] = np.sum(dZ1, axis=0, keepdims=True)

        return grads

    def update_params(self, grads, learning_rate):
        for key in self.params.keys():
            self.params[key] -= learning_rate * grads['d' + key]

    def compute_loss(self, y_true, y_pred):
        return cross_entropy_loss(y_true, y_pred)
    
    def predict(self, X):
        return self.forward(X)

# データの前処理
def load_mnist_data():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(X_train.shape[0], -1).astype(np.float32) / 255
    X_test = X_test.reshape(X_test.shape[0], -1).astype(np.float32) / 255
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)
    return X_train, y_train, X_test, y_test

# トレーニング関数
def train(X, y, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, epochs, learning_rate):
    net = FourLayerNet(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)
    
    for epoch in range(epochs):
        y_pred = net.forward(X)
        loss = net.compute_loss(y, y_pred)
        grads = net.backward(X, y)
        net.update_params(grads, learning_rate)
        
        print(f'Epoch {epoch}/{epochs} - Loss: {loss:.4f}')

    return net


X_train, y_train, X_test, y_test = load_mnist_data()
input_size = X_train.shape[1]
hidden_size1 = 256
hidden_size2 = 128
hidden_size3 = 32
output_size = 10
epochs = 800
learning_rate = 1

net = train(X_train, y_train, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, epochs, learning_rate)


Epoch 0/800 - Loss: 2.3026
Epoch 1/800 - Loss: 2.3023
Epoch 2/800 - Loss: 2.3021
Epoch 3/800 - Loss: 2.3019
Epoch 4/800 - Loss: 2.3018
Epoch 5/800 - Loss: 2.3016
Epoch 6/800 - Loss: 2.3015
Epoch 7/800 - Loss: 2.3015
Epoch 8/800 - Loss: 2.3014
Epoch 9/800 - Loss: 2.3014
Epoch 10/800 - Loss: 2.3013
Epoch 11/800 - Loss: 2.3013
Epoch 12/800 - Loss: 2.3013
Epoch 13/800 - Loss: 2.3012
Epoch 14/800 - Loss: 2.3012
Epoch 15/800 - Loss: 2.3012
Epoch 16/800 - Loss: 2.3012
Epoch 17/800 - Loss: 2.3012
Epoch 18/800 - Loss: 2.3012
Epoch 19/800 - Loss: 2.3012
Epoch 20/800 - Loss: 2.3012
Epoch 21/800 - Loss: 2.3012
Epoch 22/800 - Loss: 2.3012
Epoch 23/800 - Loss: 2.3012
Epoch 24/800 - Loss: 2.3012
Epoch 25/800 - Loss: 2.3012
Epoch 26/800 - Loss: 2.3012
Epoch 27/800 - Loss: 2.3012
Epoch 28/800 - Loss: 2.3012
Epoch 29/800 - Loss: 2.3011
Epoch 30/800 - Loss: 2.3011
Epoch 31/800 - Loss: 2.3011
Epoch 32/800 - Loss: 2.3011
Epoch 33/800 - Loss: 2.3011
Epoch 34/800 - Loss: 2.3011
Epoch 35/800 - Loss: 2.3011
Ep

## predict test data

In [49]:
y_pre = net.predict(X_test)

miss_count = 0
conf_matrix = np.zeros((10, 10), dtype=int)

for i in range(len(y_pre)):
    answer = np.argmax(y_test[i])
    result = np.argmax(y_pre[i])
    conf_matrix[answer][result] += 1
    if answer != result: miss_count +=1
    # print(answer, result, "x" if answer != result else "")

import pandas as pd 

TP = np.array([conf_matrix[i][i] for i in range(10)])
FN = np.array([np.sum(conf_matrix[i]) - conf_matrix[i][i] for i in range(10) ])
FP = np.array([np.sum(conf_matrix.T[i]) - conf_matrix[i][i] for i in range(10) ])
TN = np.array([np.sum(conf_matrix) - TP[i] - FN[i] - FP[i] for i in range(10)])
print(conf_matrix)

Precision = np.array([TP / (TP + FP)])
Recall = np.array([TP / (TP + FN)])
F_value = np.array([ 2 * Recall * Precision / (Recall + Precision)])

df = pd.DataFrame({"Precision": Precision[0], "Recall": Recall[0], "F_value": F_value[0][0]})
print("\n\n", df)

acc = (len(y_pre) - miss_count) / len(y_pre)

print(f"\nAccuracy: {acc}")

[[ 954    0   11    1    0    5    8    1    0    0]
 [   0 1107   11    2    0    0    4    5    2    4]
 [  64    0  861    9    1    4   67   15    4    7]
 [  25    1   58  829    0   60    4   12    6   15]
 [   2    0   34    0  802    1   23    5    2  113]
 [  25    1   11   15    0  800   14    0   10   16]
 [  27    3   28    0    4    9  886    1    0    0]
 [   4   10   33    0    3    0    0  959    0   19]
 [  12    0   85   11    8   61   26    8  706   57]
 [   5    6   15    4   10   12    1   46    0  910]]


    Precision    Recall   F_value
0   0.853309  0.973469  0.909438
1   0.981383  0.975330  0.978347
2   0.750654  0.834302  0.790271
3   0.951780  0.820792  0.881446
4   0.968599  0.816701  0.886188
5   0.840336  0.896861  0.867679
6   0.857696  0.924843  0.890005
7   0.911597  0.932879  0.922115
8   0.967123  0.724846  0.828638
9   0.797546  0.901883  0.846512

Accuracy: 0.8814


ValueError: Classification metrics can't handle a mix of multilabel-indicator and continuous-multioutput targets