In [6]:
import numpy as np
import pprint
import keras
from keras.api.datasets import mnist

In [2]:
# ニューラルネットワークの構造
input_size = 784    # 入力層のニューロン数
hidden_size_1 = 256   # 隠れ層のニューロン数
hidden_size_2 = 128   # 隠れ層のニューロン数
hidden_size_3 = 32   # 隠れ層のニューロン数
output_size = 10   # 出力層のニューロン数

# 学習率
learning_rate = 0.01

In [3]:
# 重みの初期化
np.random.seed(0) # 再現性のためのシード値設定
W1 = np.random.randn(input_size, hidden_size_1)  # 入力層から隠れ層1への重み
b1 = np.zeros((1, hidden_size_1))                # 隠れ層1のバイアス
W2 = np.random.randn(hidden_size_1, hidden_size_2) # 隠れ層1から隠れ層2への重み
b2 = np.zeros((1, hidden_size_2))                # 隠れ層2のバイアス
W3 = np.random.randn(hidden_size_2, hidden_size_3) # 隠れ層2から出力層への重み
b3 = np.zeros((1, hidden_size_3))                # 出力層のバイアス
W4 = np.random.randn(hidden_size_3, output_size) # 隠れ層2から出力層への重み
b4 = np.zeros((1, output_size))                # 出力層のバイアス

pprint.pprint(W1)
pprint.pprint(b1)
pprint.pprint(W2)
pprint.pprint(b2)
pprint.pprint(W3)
pprint.pprint(b3)
pprint.pprint(W4)
pprint.pprint(b4)

array([[ 1.76405235,  0.40015721,  0.97873798, ..., -0.31932842,
         0.69153875,  0.69474914],
       [-0.72559738, -1.38336396, -1.5829384 , ...,  1.30142807,
         0.89526027,  1.37496407],
       [-1.33221165, -1.96862469, -0.66005632, ..., -1.10290621,
        -0.10169727,  0.01927938],
       ...,
       [ 1.86140699, -0.78053759,  0.03168013, ..., -1.06306082,
         0.43608227, -1.17683911],
       [ 1.39020734, -0.51657398, -0.13652254, ...,  0.87964912,
         0.80342249, -0.29504386],
       [ 1.57119338, -0.15435302,  0.08733296, ..., -0.26611561,
         2.52287972,  0.73131543]])
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.,

In [4]:
# シグモイド関数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# シグモイド関数の導関数
def sigmoid_derivative(x):
    return x * (1 - x)

# relu関数
def relu(x):
    return np.maximum(0, x)

# relu関数の導関数
def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def softmax(u):
    e = np.exp(u)
    return e / np.sum(e)


In [5]:
def forward_propagation(X):
    global W1, b1, W2, b2, W3, b3, W4, b4
    # 隠れ層1への入力
    z1 = np.dot(X, W1) + b1
    a1 = relu(z1)

    # 隠れ層2への入力
    z2 = np.dot(a1, W2) + b2
    a2 = relu(z2)

    # 隠れ層3への入力
    z3 = np.dot(a2, W3) + b3
    a3 = relu(z3)

    # 出力層への入力
    z4 = np.dot(a3, W4) + b4
    a4 = softmax(z4)
    
    return a1, a2, a3, a4

In [6]:
def backward_propagation(X, y, a1, a2, a3, a4):
    global W1, b1, W2, b2, W3, b3, W4, b4

    # 出力層の誤差
    output_error = y - a4
    output_delta = output_error * relu_derivative(a4)

    # 隠れ層の誤差
    hidden_3_error = np.dot(output_delta, W4.T)
    hidden_3_delta = hidden_3_error * relu_derivative(a3)

    # 隠れ層の誤差
    hidden_2_error = np.dot(hidden_3_delta, W3.T)
    hidden_2_delta = hidden_2_error * relu_derivative(a2)

    # 隠れ層の誤差
    hidden_1_error = np.dot(hidden_2_delta, W2.T)
    hidden_1_delta = hidden_1_error * relu_derivative(a1)

    # 重みとバイアスの更新
    W4 += learning_rate * np.dot(a3.T, output_delta)
    b4 += learning_rate * np.sum(output_delta, axis=0, keepdims=True)
    W3 += learning_rate * np.dot(a2.T, hidden_3_delta)
    b3 += learning_rate * np.sum(hidden_3_delta, axis=0, keepdims=True)
    W2 += learning_rate * np.dot(a1.T, hidden_2_delta)
    b2 += learning_rate * np.sum(hidden_2_delta, axis=0, keepdims=True)
    W1 += learning_rate * np.dot(X.T, hidden_1_delta)
    b1 += learning_rate * np.sum(hidden_1_delta, axis=0, keepdims=True)

In [7]:
def train(X, y, iterations):
    for i in range(iterations):
        # フォワードプロパゲーション
        a1, a2, a3, a4 = forward_propagation(X)
        
        # バックプロパゲーション
        backward_propagation(X, y, a1, a2, a3, a4)
        
        if iterations <= 300:
            loss = np.mean(np.square(y - a4))
            print(f'Iteration {i+1}, Loss: {loss}')
            continue
            
        if (i+1) % 1000 == 0:
            loss = np.mean(np.square(y - a4))
            print(f'Iteration {i+1}, Loss: {loss}')

In [7]:
# トレーニングデータの例
X = np.array([[0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]])

y = np.array([[0, 1],
              [1, 0],
              [1, 0],
              [0, 1]])

# データ抽出
(x_train, y_train), (x_test, y_test) = mnist.load_data()
num_classes = 10
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# one-hot-label に変換
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# 学習を実行
train(x_train, y_train, 300)

60000 train samples
10000 test samples


TypeError: train() missing 5 required positional arguments: 'hidden_size1', 'hidden_size2', 'output_size', 'epochs', and 'learning_rate'

In [None]:
def predict(X):
    # 隠れ層1への入力
    z1 = np.dot(X, W1) + b1
    a1 = relu(z1)

    # 隠れ層2への入力
    z2 = np.dot(a1, W2) + b2
    a2 = relu(z2)

    # 隠れ層3への入力
    z3 = np.dot(a2, W3) + b3
    a3 = relu(z3)

    # 出力層への入力
    z4 = np.dot(a3, W4) + b4
    a4 = softmax(z4)
    
    return a4
    
    
y_pre = predict(x_train)

for i in range(20):
    print(np.argmax(y_pre[i]), np.argmax(y_train[i]))

0 5
0 0
0 4
0 1
0 9
0 2
0 1
0 3
0 1
0 4
0 3
0 5
0 3
0 6
0 1
0 7
0 2
0 8
0 6
0 9


In [20]:
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# 活性化関数とその導関数
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), np.argmax(y_true, axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss

def cross_entropy_loss_derivative(y_true, y_pred):
    m = y_true.shape[0]
    grad = y_pred.copy()
    grad[range(m), np.argmax(y_true, axis=1)] -= 1
    grad = grad / m
    return grad

# ネットワーククラス
class FourLayerNet:
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
        self.params = {
            'W1': np.random.randn(input_size, hidden_size1) * 0.01,
            'b1': np.zeros((1, hidden_size1)),
            'W2': np.random.randn(hidden_size1, hidden_size2) * 0.01,
            'b2': np.zeros((1, hidden_size2)),
            'W3': np.random.randn(hidden_size2, hidden_size3) * 0.01,
            'b3': np.zeros((1, hidden_size3)),
            'W4': np.random.randn(hidden_size3, output_size) * 0.01,
            'b4': np.zeros((1, output_size))
        }

    def forward(self, X):
        self.cache = {}
        self.cache['Z1'] = np.dot(X, self.params['W1']) + self.params['b1']
        self.cache['A1'] = relu(self.cache['Z1'])
        self.cache['Z2'] = np.dot(self.cache['A1'], self.params['W2']) + self.params['b2']
        self.cache['A2'] = relu(self.cache['Z2'])
        self.cache['Z3'] = np.dot(self.cache['A2'], self.params['W3']) + self.params['b3']
        self.cache['A3'] = relu(self.cache['Z3'])
        self.cache['Z4'] = np.dot(self.cache['A3'], self.params['W4']) + self.params['b4']
        self.cache['A4'] = softmax(self.cache['Z4'])
        return self.cache['A4']

    def backward(self, X, y):
        m = X.shape[0]
        grads = {}

        dZ4 = cross_entropy_loss_derivative(y, self.cache['A4'])
        grads['dW4'] = np.dot(self.cache['A3'].T, dZ4)
        grads['db4'] = np.sum(dZ4, axis=0, keepdims=True)

        dA3 = np.dot(dZ4, self.params['W4'].T)
        dZ3 = dA3 * relu_derivative(self.cache['Z3'])
        grads['dW3'] = np.dot(self.cache['A2'].T, dZ3)
        grads['db3'] = np.sum(dZ3, axis=0, keepdims=True)

        dA2 = np.dot(dZ3, self.params['W3'].T)
        dZ2 = dA2 * relu_derivative(self.cache['Z2'])
        grads['dW2'] = np.dot(self.cache['A1'].T, dZ2)
        grads['db2'] = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = np.dot(dZ2, self.params['W2'].T)
        dZ1 = dA1 * relu_derivative(self.cache['Z1'])
        grads['dW1'] = np.dot(X.T, dZ1)
        grads['db1'] = np.sum(dZ1, axis=0, keepdims=True)

        return grads

    def update_params(self, grads, learning_rate):
        for key in self.params.keys():
            self.params[key] -= learning_rate * grads['d' + key]

    def compute_loss(self, y_true, y_pred):
        return cross_entropy_loss(y_true, y_pred)
    
    def predict(self, X):
        return self.forward(X)

# データの前処理
def load_mnist_data():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(X_train.shape[0], -1).astype(np.float32) / 255
    X_test = X_test.reshape(X_test.shape[0], -1).astype(np.float32) / 255
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)
    return X_train, y_train, X_test, y_test

# トレーニング関数
def train(X, y, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, epochs, learning_rate):
    net = FourLayerNet(input_size, hidden_size1, hidden_size2, hidden_size3, output_size)
    
    for epoch in range(epochs):
        y_pred = net.forward(X)
        loss = net.compute_loss(y, y_pred)
        grads = net.backward(X, y)
        net.update_params(grads, learning_rate)
        
        print(f'Epoch {epoch}/{epochs} - Loss: {loss:.4f}')

    return net


X_train, y_train, X_test, y_test = load_mnist_data()
input_size = X_train.shape[1]
hidden_size1 = 256
hidden_size2 = 128
hidden_size3 = 32
output_size = 10
epochs = 800
learning_rate = 1

net = train(X_train, y_train, input_size, hidden_size1, hidden_size2, hidden_size3, output_size, epochs, learning_rate)


Epoch 0/800 - Loss: 2.3026
Epoch 1/800 - Loss: 2.3023
Epoch 2/800 - Loss: 2.3021
Epoch 3/800 - Loss: 2.3019
Epoch 4/800 - Loss: 2.3018
Epoch 5/800 - Loss: 2.3016
Epoch 6/800 - Loss: 2.3015
Epoch 7/800 - Loss: 2.3015
Epoch 8/800 - Loss: 2.3014
Epoch 9/800 - Loss: 2.3014
Epoch 10/800 - Loss: 2.3013
Epoch 11/800 - Loss: 2.3013
Epoch 12/800 - Loss: 2.3013
Epoch 13/800 - Loss: 2.3012
Epoch 14/800 - Loss: 2.3012
Epoch 15/800 - Loss: 2.3012
Epoch 16/800 - Loss: 2.3012
Epoch 17/800 - Loss: 2.3012
Epoch 18/800 - Loss: 2.3012
Epoch 19/800 - Loss: 2.3012
Epoch 20/800 - Loss: 2.3012
Epoch 21/800 - Loss: 2.3012
Epoch 22/800 - Loss: 2.3012
Epoch 23/800 - Loss: 2.3012
Epoch 24/800 - Loss: 2.3012
Epoch 25/800 - Loss: 2.3012
Epoch 26/800 - Loss: 2.3011
Epoch 27/800 - Loss: 2.3011
Epoch 28/800 - Loss: 2.3011
Epoch 29/800 - Loss: 2.3011
Epoch 30/800 - Loss: 2.3011
Epoch 31/800 - Loss: 2.3011
Epoch 32/800 - Loss: 2.3011
Epoch 33/800 - Loss: 2.3011
Epoch 34/800 - Loss: 2.3011
Epoch 35/800 - Loss: 2.3011
Ep

In [26]:
y_pre = net.predict(x_test)

miss_count = 0

for i in range(len(y_pre)):
    answer = np.argmax(y_test[i])
    result = np.argmax(y_pre[i])
    if answer != result:
        miss_count = miss_count + 1
    print(answer, result, "x" if answer != result else "")

print(f'{miss_count} / {len(y_pre)}')

7 7 
2 2 
1 1 
0 0 
4 4 
1 1 
4 4 
9 9 
5 4 x
9 9 
0 0 
6 6 
9 9 
0 0 
1 1 
5 5 
9 9 
7 7 
3 3 
4 4 
9 9 
6 6 
6 6 
5 5 
4 4 
0 0 
7 7 
4 4 
0 0 
1 1 
3 3 
1 1 
3 3 
4 6 x
7 7 
2 2 
7 7 
1 1 
2 3 x
1 1 
1 1 
7 7 
4 4 
2 2 
3 3 
5 3 x
1 1 
2 2 
4 4 
4 4 
6 6 
3 3 
5 5 
5 5 
6 6 
0 0 
4 4 
1 1 
9 9 
5 5 
7 7 
8 8 
9 4 x
3 3 
7 7 
4 4 
6 4 x
4 4 
3 3 
0 0 
7 7 
0 0 
2 2 
9 9 
1 1 
7 7 
3 3 
2 7 x
9 9 
7 7 
7 9 x
6 6 
2 2 
7 7 
8 8 
4 4 
7 7 
3 3 
6 6 
1 1 
3 3 
6 6 
9 9 
3 3 
1 1 
4 4 
1 1 
7 9 x
6 6 
9 9 
6 6 
0 0 
5 5 
4 4 
9 9 
9 9 
2 2 
1 1 
9 9 
4 4 
8 8 
7 7 
3 3 
9 9 
7 7 
4 4 
4 4 
4 4 
9 9 
2 9 x
5 3 x
4 4 
7 7 
6 6 
7 4 x
9 9 
0 0 
5 5 
8 8 
5 5 
6 6 
6 6 
5 5 
7 7 
8 8 
1 1 
0 0 
1 1 
6 6 
4 4 
6 6 
7 7 
3 3 
1 1 
7 7 
1 1 
8 8 
2 2 
0 0 
2 9 x
9 9 
9 8 x
5 5 
5 5 
1 1 
5 5 
6 6 
0 0 
3 3 
4 4 
4 4 
6 6 
5 5 
4 4 
6 6 
5 3 x
4 4 
5 5 
1 1 
4 4 
4 4 
7 7 
2 3 x
3 3 
2 2 
7 9 x
1 1 
8 8 
1 1 
8 8 
1 1 
8 8 
5 5 
0 0 
8 8 
9 4 x
2 2 
5 3 x
0 0 
1 1 
1 1 
1 1 
0 0 
9 9 
0 0 
3 3 
1