# 5章 誤差逆伝播法
## 5.4 単純なレイヤの実装
### 5.4.1 乗算レイヤの実装

In [1]:
class MulLayer:
  def __init__(self):
    self.x = None
    self.y = None

  def forward(self, x, y):
    self.x = x
    self.y = y
    out = x * y
    return out

  def backward(self, dout):
    dx = dout * self.y
    dy = dout * self.x
    return dx, dy

In [2]:
apple = 100
apple_num = 2
tax = 1.1
# layer
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()
# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)

print(price)

220.00000000000003


In [3]:
# backward
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple, dapple_num, dtax)

2.2 110.00000000000001 200


### 5.4.2 加算レイヤの実装

In [4]:
class AddLayer:
  def __init__(self):
    pass

  def forward(self, x, y):
    out = x + y
    return out

  def backward(self, dout):
    dx = dout * 1
    dy = dout * 1
    return dx, dy

In [7]:
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1
# layer
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()
# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)
all_price = add_apple_orange_layer.forward(apple_price, orange_price)
price = mul_tax_layer.forward(all_price, tax)
# backward
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)
dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple_num, dapple, dorange, dorange_num, dtax)

110.00000000000001 2.2 3.3000000000000003 165.0 650


## 5.5 活性化関数レイヤの実装
### 5.5.1 ReLUレイヤ

In [2]:
class Relu:
  def __init__(self):
    self.mask = None

  def forward(self, x):
    self.mask = (x <= 0)
    out = x.copy()
    out[self.mask] = 0
    return out

  def backward(self, dout):
    dout[self.mask] = 0
    dx = dout
    return dx

In [3]:
import numpy as np
x = np.array([[1.0, -0.5], [-2.0, 3.0]])
print(x)

[[ 1.  -0.5]
 [-2.   3. ]]


In [11]:
mask = (x <=0)
print(mask)

[[False  True]
 [ True False]]


### 5.5.2 Sigmoidレイヤの実装

In [12]:
class Sigmoid:
  def __init__(self):
    self.out = None

  def forward(self, x):
    out = 1 / (1 + np.exp(-x))
    self.out = out
    return out

  def backward(self, dout):
    dx = dout * (1.0 - self.out) * self.out
    return dx

## 5.6 Affine/Softmaxレイヤの実装
### 5.6.2 バッチ版Affineレイヤ

In [13]:
X_dot_W = np.array([[0, 0, 0], [10, 10, 10]])
B = np.array([1, 2, 3])
X_dot_W

array([[ 0,  0,  0],
       [10, 10, 10]])

In [14]:
X_dot_W + B

array([[ 1,  2,  3],
       [11, 12, 13]])

In [17]:
dY = np.array([[1, 2, 3], [4, 5, 6]])
dY

array([[1, 2, 3],
       [4, 5, 6]])

In [18]:
dB = np.sum(dY, axis=0)
dB

array([5, 7, 9])

In [19]:
class Affine:
  def __init__(self, W, b):
    self.W = W
    self.b = b
    self.x = None
    self.dW = None
    self.db = None

  def forward(self, x):
    self.x = x
    out = np.dot(x, self.W) + self.b
    return out

  def backward(self, dout):
    dx = np.dot(dout, self.W.T)
    self.dW = np.dot(self.x.T, dout)
    self.db = np.sum(dout, axis=0)
    return dx

### 5.6.3 Softmax-with-Lossレイヤ

In [20]:
class SoftmaxWithLoss:
  def __init__(self):
    self.loss = None
    self.y = None
    self.t = None

  def forward(self, x, t):
    self.t = t
    self.y = softmax(x)
    self.loss = cross_entropy_error(self.y, self.t)
    return self.SoftmaxWithLoss

  def backward(self, dout=1):
    batch_size = self.t.shape[0]
    dx = (self.y - self.t) / batch_size
    return dx

## 5.7 誤差逆伝播法の実装
### 5.7.2 誤差逆伝播法に対応したニューラルネットワークの実装
別コードで実装

### 5.7.3 誤差逆伝播法の勾配確認

In [1]:
import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks/DeepLearningFromScratch/')
import numpy as np
from official.dataset.mnist import load_mnist
from twolayernet2 import *

# データ読み込み
(x_training, t_training), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

x_batch = x_training[:3]
t_batch = t_training[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

# 各重みの絶対誤差の平均を求める
for key in grad_numerical.keys():
  diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
  print(key + ":" + str(diff))

W1:0.0001046460918797607
b1:0.000817719102977889
W2:0.002297906712487563
b2:0.06666666678657075


### 5.7.4 誤差逆伝播法を使った学習
数値微分によるものよりかなり高速。

In [1]:
import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks/DeepLearningFromScratch/')
import numpy as np
from official.dataset.mnist import load_mnist
from twolayernet2 import *

# データ読み込み
print("=== load mnist")
(x_training, t_training), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

print("=== make network")
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num = 10000
training_size = x_training.shape[0]
batch_size = 100
learning_rate = 0.1

training_loss_list = []
training_accuracy_list = []
test_accuracy_list = []
iter_per_epoch = max(training_size / batch_size, 1)

print("=== start batches")
for i in range(iters_num):
    # ミニバッチの取得
    batch_mask = np.random.choice(training_size, batch_size)
    x_batch = x_training[batch_mask]
    t_batch = t_training[batch_mask]
    # 勾配計算
    grad = network.gradient(x_batch, t_batch)
    # update
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
    # record
    loss = network.loss(x_batch, t_batch)
    training_loss_list.append(loss)
    # 1 epoch ごとに精度を計算
    if i % iter_per_epoch == 0:
        training_accuracy = network.accuracy(x_training, t_training)
        test_accuracy = network.accuracy(x_test, t_test)
        training_accuracy_list.append(training_accuracy)
        test_accuracy_list.append(test_accuracy)
        print("training accuracy, test accuracy | " + str(training_accuracy) + ", " + str(test_accuracy))

=== load mnist
=== make network
=== start batches
training accuracy, test accuracy | 0.08923333333333333, 0.0881
training accuracy, test accuracy | 0.8939833333333334, 0.8999
training accuracy, test accuracy | 0.9226833333333333, 0.9239
training accuracy, test accuracy | 0.9344833333333333, 0.9353
training accuracy, test accuracy | 0.94215, 0.9416
training accuracy, test accuracy | 0.9511166666666667, 0.948
training accuracy, test accuracy | 0.95485, 0.9492
training accuracy, test accuracy | 0.9592166666666667, 0.9544
training accuracy, test accuracy | 0.9610166666666666, 0.955
training accuracy, test accuracy | 0.96345, 0.9589
training accuracy, test accuracy | 0.9678166666666667, 0.9625
training accuracy, test accuracy | 0.9696166666666667, 0.963
training accuracy, test accuracy | 0.9717333333333333, 0.9634
training accuracy, test accuracy | 0.97265, 0.9651
training accuracy, test accuracy | 0.97505, 0.9666
training accuracy, test accuracy | 0.9759666666666666, 0.9676
training accura

# 5章終わり