### 5.4 単純なレイヤの実装

In [32]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None

    def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y
        return out

    def backward(self, dout):
        dx = dout * self.y  # xとyを入れ替え
        dy = dout * self.x
        return dx, dy


apple = 100
apple_num = 2
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)
print(price)

# backward
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)
print(dapple_price, dtax)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)
print(dapple, dapple_num, dtax)

220.00000000000003
1.1 200
2.2 110.00000000000001 200


In [33]:
class AddLayer:
    def __init__(self):
        pass

    def forward(self, x, y):
        out = x + y
        return out

    def backward(self, dout):
        dx = dout * 1  # xとyを入れ替え
        dy = dout * 1
        return dx, dy


apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)
all_price = add_apple_orange_layer.forward(apple_price, orange_price)
price = mul_tax_layer.forward(all_price, tax)
print(price)

# backward
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)
dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple_num, dapple, dorange, dorange_num, dtax)

715.0000000000001
110.00000000000001 2.2 3.3000000000000003 165.0 650


### 5.5 活性化関数レイヤの実装
ReLuレイヤ

In [34]:
from re import M
import numpy as np


class ReLu:
    def __init__(self):
        self.mask = None  # True/FalseからなるNumpy配列

    def forward(self, x):
        self.mask = x <= 0  # 0以下の場所をTrue,それ以外の場所をFalse
        out = x.copy()
        out[self.mask] = 0
        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx


x = np.array([[1.0, -0.5], [-2.0, 3.0]])
print(x)
mask = x <= 0
print(mask)
out = x.copy()
out[mask] = 0
print(out)

[[ 1.  -0.5]
 [-2.   3. ]]
[[False  True]
 [ True False]]
[[1. 0.]
 [0. 3.]]


Sigmoidレイヤ

In [35]:
import numpy as np


class Sigmoid:
    def __init__(self):
        self.out = None  # 順伝播時の出力

    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out

    def backward(self, dout):
        dx = dout * (1 - self.out) * self.out
        return dx

### 5.6.1 Affineレイヤ
ニューラルネットワークの順伝播で行う行列積

In [36]:
import numpy as np

# バッチ版Affineレイヤ
X_dot_W = np.array([[0, 0, 0], [10, 10, 10]])
B = np.array([1, 2, 3])
print(X_dot_W)
print(X_dot_W + B)

dY = np.array([[1, 2, 3], [4, 5, 6]])
print(dY)

dB = np.sum(dY, axis=0)  # 列ごとにSum
print(dB)

[[ 0  0  0]
 [10 10 10]]
[[ 1  2  3]
 [11 12 13]]
[[1 2 3]
 [4 5 6]]
[5 7 9]


In [37]:
import numpy as np


class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.dB = None

    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b
        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        return dx

### Softmax-with-Loss レイヤ

In [38]:
import numpy as np
from common.functions import softmax, cross_entropy_error


class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None  # 損失
        self.yy = None  # softmaxの出力
        self.t = None  # 教師データ(one-hot)

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size  # データ1行こあたりの誤差
        return dx

### 5.7 誤差逆伝播法の実装

In [39]:
from common.functions import *
from common.gradient import numerical_gradient
from collections import OrderedDict


class TwoLayerNet:
    # init_size: 入力層のニューロン数 hidden_size: 隠れ層のニューロン数 output_size: 出力層のニューロン数
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 重みの初期化
        self.params = {}
        self.params["W1"] = weight_init_std * np.random.randn(
            input_size, hidden_size
        )  # 1層目の重み
        self.params["b1"] = np.zeros(hidden_size)  # 1層目のバイアス
        self.params["W2"] = weight_init_std * np.random.randn(
            hidden_size, output_size
        )  # 2層目の重み
        self.params["b2"] = np.zeros(output_size)  # 2層目のバイアス

        # レイヤの生成
        self.layers = OrderedDict()  # 順番付きディクショナリ
        self.layers["Affine1"] = Affine(self.params["W1"], self.params["b1"])
        self.layers["ReLu1"] = ReLu()
        self.layers["Affine2"] = Affine(self.params["W2"], self.params["b2"])
        self.lastLayer = SoftmaxWithLoss()

    # x:画像データ
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    # x: 画像データ, t: 正解ラベル
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads["W1"] = numerical_gradient(loss_W, self.params["W1"])
        grads["b1"] = numerical_gradient(loss_W, self.params["b1"])
        grads["W2"] = numerical_gradient(loss_W, self.params["W2"])
        grads["b2"] = numerical_gradient(loss_W, self.params["b2"])

        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()  # 逆順に
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        grads["W1"] = self.layers["Affine1"].dW
        grads["b1"] = self.layers["Affine1"].db
        grads["W2"] = self.layers["Affine2"].dW
        grads["b2"] = self.layers["Affine2"].db
        return grads

### 5.7.3 勾配確認
誤差逆伝播法と数値微分が一致するか

In [41]:
import numpy as np
from dataset.mnist import load_mnist

# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ":" + str(diff))

W1:4.937993158352796e-10
b1:2.9125316883081695e-09
W2:6.118761356933307e-09
b2:1.391683630280749e-07


### 5.7.4 誤差逆伝播法による学習

In [42]:
import numpy as np
from dataset.mnist import load_mnist

# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # 勾配
    grad = network.gradient(x_batch, t_batch)

    # 更新
    for key in ("W1", "b1", "W2", "b2"):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)

0.10705 0.1047
0.9018833333333334 0.9075
0.92285 0.9252
0.9377333333333333 0.9364
0.9462833333333334 0.9453
0.9518833333333333 0.951
0.9587 0.9557
0.9617833333333333 0.9587
0.96425 0.9589
0.9685666666666667 0.9629
0.97 0.9653
0.9723166666666667 0.9669
0.9735166666666667 0.9682
0.9741666666666666 0.9673
0.9768166666666667 0.9682
0.9779166666666667 0.9696
0.9780666666666666 0.9692
