# 5. 오차역전파법

## 5.4 단순한 계층 구현하기

### 5.4.1 곱셈 계층

In [1]:
class MulLayer:
  def __init__(self):
    self.x = None
    self.y = None

  def forward(self, x, y):
    self.x = x
    self.y = y
    out = x * y
    return out

  def backward(self, dout): #dout : 상류에서 넘어온 미분
    dx = dout * self.y #x와 y를 바꿈
    dy = dout + self.x
    return dx, dy

- 순전파 구현

In [2]:
# 입력값
apple = 100
apple_num = 2
tax = 1.1

# 계층들
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

#순전파
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)

print(price)

220.00000000000003


- 역전파 구현 : 각 변수에 대한 미분

In [3]:
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple, dapple_num, dtax)

2.2 101.1 201


### 5.4.2 덧셈 계층

In [4]:
class AddLayer:
  def __init__(self):
    pass

  def forward(self, x, y):
    out = x + y
    return out

  def backward(self, dout):
    dx = dout * 1
    dy = dout * 1
    return dx, dy

In [5]:
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

#layer
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_total_layer = AddLayer()
mul_tax_layer = MulLayer()

#forward
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)
total_price = add_total_layer.forward(apple_price, orange_price)
price = mul_tax_layer.forward(total_price, tax)

print(price)

#backward
dprice = 1
dtotal_price, dtax = mul_tax_layer.backward(dprice)
dapple_price, dorange_price = add_total_layer.backward(dtotal_price)
dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple_num, dapple, dorange, dorange_num, dtax)

715.0000000000001
101.1 2.2 3.3000000000000003 151.1 651


<br>

## 5.5 활성화 함수 계층 구현하기

### 5.5.1 ReLU 계층

In [6]:
class Relu:
  def __init__(self):
    self.mask = None

  def forward(self, x):
    self.mask = (x<=0)
    out = x.copy()
    out[self.mask] = 0
    return out

  def backward(self, dout):
    dout[self.mask] = 0
    dx = dout
    return dx

### 5.5.2 Sigmoid 계층

In [7]:
import numpy as np
class Sigmoid:
  def __init__(self):
    self.out = None

  def forward(self, x):
    out = 1 / (1 + np.exp(-x))
    self.out = out
    return out
  
  def backward(self, dout):
    dx = dout * (1.0 - self.out) * self.out
    return dx

<br>

## 5.6 Affine/Softmax 계층 구현하기

### 5.6.2 배치용 Affine 계층

In [8]:
# 데이터가 2개인 경우우
class Affine:
  def __init__(self, W, b):
    self.W = W
    self.b = b
    self.x = None
    self.dW = None
    self.db = None

  def forward(self, x):
    self.x = x
    out = np.dot(x, self.W) + self.b
    return out

  def backward(self, dout):
    dx = np.dot(dout, self.W.T)
    self.dW = np.dot(self.x.T, dout)
    self.db = np.sum(dout, axis = 0)
    return dx

### 5.6.3 Softmax-with-Loss 계층

In [9]:
class SoftmaxWithLoss:
  def __init__(self):
    self.loss = None #손실
    self.y = None #softmax의 출력
    self.t = None #정답 레이블(원-핫 인코딩)

  def forward(self, x, t):
    self.t = t
    self.y = softmax(x)
    self.loss = cross_entropy_error(self.y, self.t)
    return self.loss

  def backward(self, dout = 1):
    batch_size = self.t.shape[0]
    #역전파에서는 전파하는 값을 배치의 수로 나눠서 데이터 1개당 오차를 앞 계층으로 전파
    dx = (self.y - self.t) / batch_size
    return dx

<Br>


## 5.7 오차역전파법 구현하기

### 5.7.2 오차역전파법을 적용한 신경망 구현하기



In [10]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
import sys, os
sys.path.append('/content/drive/MyDrive/Colab Notebooks')
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict

In [12]:
class TwoLayerNet:
  def __init__(self, input_size, hidden_size, output_size,
               weight_init_std = 0.01):
    #가중치 초기화
    self.params = {}
    self.params['W1'] = weight_init_std * \
                      np.random.randn(input_size, hidden_size)
    self.params['b1'] = np.zeros(hidden_size)
    self.parmas['W2'] = weight_init_std * \
                      np.random.randn(hidden_size, output_size)
    self.params['b2'] = np.zeors(output_size)

    #계층 생성
    self.layers = OrderedDict()
    #신경망의 계층을 순서가 있는 딕셔너리에 보관
    #따라서 각 계층을 순서대로 유지함
    self.layers['Affine1'] = \
        Affine(self.params['W1'], self.params['b1'])
    self.layers['Relu1'] = Relu()
    self.layers['Affine2'] = \
        Affine(self.params['W2'], self.params['b2'])
    self.lastLayer = SoftmaxWithLoss() #신경망의 마지막계층


  def predict(self, x): #예측(추론)을 수행
    for layer in self.layers.values():
      x = layer.forward(x)
    return x

  def loss(self, x, t): #손실 함수의 값을 구함
    y = self.predict(x)
    return self.lastLayer(y, t)

  def accuracy(self, x, t): #정확도를 구함
    y = self.predict(x)
    y = np.aragmax(y, axis = 1)
    if t.ndim != 1 : t = np.argmax(t, axis = 1)
    accuracy = np.sum(y == t) / float(x.shape[0])
    return accuracy

  def numerical_gradient(self, x, t):
    #가중치 매개변수의 기울기를 수치 미분 방식으로 구함
    loss_W = lambda W: self.loss(x, t)

    grads = {}
    grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
    grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
    grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
    grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

    return grads

  def gradient(self, x, t):
    #가중치 매개변수의 기울기를 오차역전파법으로 구함
    #순전파
    self.loss(x, t)

    #역전파
    dout = 1
    dout = self.lastLayer.backward(dout)

    layers = list(self.layers.value())
    layers.reverse()
    for layer in layers:
      dout = layer.backward(dout)

    #결과 저장
    grads = {}
    grads['W1'] = self.layers['Affine1'].dW
    grads['b1'] = self.layers['Affine1'].db
    grads['W2'] = self.layers['Affine2'].dW
    grads['b2'] = self.layers['Affine2'].db

    return grads

### 5.7.3 오차역전파법으로 구한 기울기 검증하기

In [15]:
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

#데이터 읽기
(x_train, t_train), (x_test, t_test) = \
  load_mnist(normalize = True, one_hot_label = True)

network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

#각 가중치의 차이의 절댓값을 구한 후, 그 절댓값들의 평균을 구함
for key in grad_numerical.keys():
  diff = np.average(np.abs(grad_backprop[key]) - grad_numerical[key])
  print(key + ' : ' + str(diff))

W1 : 0.0006432390797180414
b1 : 0.0038673222949065743
W2 : 0.006932886937897687
b2 : 0.13977256668358157


### 5.7.4 오차역전파법을 사용한 학습 구현하기

In [18]:
#데이터 읽기
(x_train, t_train), (x_test, t_test) = \
  load_mnist(normalize = True, one_hot_label = True)

network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)

iters_num = 1000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = x_train[batch_mask]
  t_batch = t_train[batch_mask]

  #오차역전파법으로 기울기 도출
  grad = network.gradient(x_batch, t_batch)

  #갱신
  for key in ('W1', 'b1','W2', 'b2'):
    network.params[key] -= learning_rate * grad[key]
  
  loss = network.loss(x_batch, t_batch)
  train_loss_list.append(loss)

  if i % iter_per_epoch == 0:
    train_acc = network.accuracy(x_train, t_train)
    test_acc = network.accuracy(x_test, t_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
    print(train_acc, test_acc)

0.10558333333333333 0.103
0.9032 0.9082
