<a href="https://colab.research.google.com/github/mldlcl2022/dl_from_scratch_1/blob/main/Ch_5_%EC%98%A4%EC%B0%A8%EC%97%AD%EC%A0%84%ED%8C%8C%EB%B2%95.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

* 책의 모든 코드는 구글 코랩(Google Colab)을 활용하여 실습

# Chapter 5. 오차역전파법
240123

## 5.4 단순한 계층 구현하기(p.160)

### 5.4.1 곱셈 계층

In [1]:
class MulLayer :
    def __init__(self) :
        self.x = None
        self.y = None

    def forward(self, x, y) :
        self.x = x
        self.y = y
        out = x * y

        return out

    def backward(self, dout) :
        # x와 y를 바꿈
        dx = dout * self.y
        dy = dout * self.x

        return dx, dy

In [2]:
apple = 100   # 사과 개당 100원
apple_num = 2 # 사과 갯수 2개
tax = 1.1     # 소비세 1.1%

# 계층
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# 순전파
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)

# 결과 출력
print(price)

220.00000000000003


In [3]:
# 역전파
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

# 결과 출력
print(dapple, dapple_num, dtax)

2.2 110.00000000000001 200


### 5.4.2 덧셈 계층

In [4]:
class AddLayer :
    def __init__(self) :
        pass

    def forward(self, x, y) :
        out = x + y

        return out

    def backward(self, dout) :
        dx = dout * 1
        dy = dout * 1

        return dx, dy

In [5]:
apple = 100    # 사과 개당 100원
apple_num = 2  # 사과 갯수 2개
orange = 150   # 오렌지 개당 150원
orange_num = 3 # 오렌지 갯수 3개
tax = 1.1      # 소비세 1.1%

# 계층
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

# 순전파
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)
all_price = add_apple_orange_layer.forward(apple_price, orange_price)
price = mul_tax_layer.forward(all_price, tax)

# 역전파
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)
dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

# 결과 출력
print("price :", int(price))
print()
print("dApple :", dapple)
print("dApple_num :", int(dapple_num))
print("dOrange :", dorange)
print("dOrange_num :", int(dorange_num))
print("dTax :", dtax)

price : 715

dApple : 2.2
dApple_num : 110
dOrange : 3.3000000000000003
dOrange_num : 165
dTax : 650


## 5.5 활성화 함수 계층 구현하기(p.165)

### 5.5.1 ReLU 계층

In [6]:
class Relu :
    def __init__(self) :
        self.mask = None

    def forward(self, x) :
        self.mask = (x <= 0) # x값이 0보다 작거나 같으면 True, 0보다 크면 False
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout) :
        dout[self.mask] = 0
        dx = dout

        return dx

In [7]:
import numpy as np

x = np.array([[1.0,-0.5],[-2.0,3.0]])
print(x)

[[ 1.  -0.5]
 [-2.   3. ]]


In [8]:
mask = (x <= 0)
print(mask)

[[False  True]
 [ True False]]


### 5.5.2 Sigmoid 계층

In [9]:
class Sigmoid :
    def __init__(self) :
        self.out = None

    def forward(self, x) :
        out = 1 / (1+exp(-x))
        self.out = out

        return out

    def backward(self, dout) :
        dx = dout * self.out * (1.0 - self.out)

        return dx

## 5.6 Affine/Softmax 계층 구현하기(p.170)

### 5.6.2 배치용 Affine 계층

In [10]:
X_dot_W = np.array([[0,0,0],[10,10,10]])
B = np.array([1,2,3])

In [11]:
X_dot_W

array([[ 0,  0,  0],
       [10, 10, 10]])

In [12]:
B

array([1, 2, 3])

In [13]:
X_dot_W + B

array([[ 1,  2,  3],
       [11, 12, 13]])

In [14]:
dY = np.array([[1,2,3],[4,5,6]])

In [15]:
dY

array([[1, 2, 3],
       [4, 5, 6]])

In [16]:
dB = np.sum(dY, axis= 0)
dB

array([5, 7, 9])

In [17]:
class Affine :
    def __init__(self, W, b) :
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    def forward(self, x) :
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out

    def backward(self, dout) :
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis= 0)

        return dx

In [18]:
import numpy as np

a = np.array([[1,2,3],[4,5,6]])
print(a)
print(np.sum(a, axis= 0))

[[1 2 3]
 [4 5 6]]
[5 7 9]


### 5.6.3 Softmax-with-Loss 계층

In [19]:
def softmax(a) :
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a/sum_exp_a

    return y

def cross_entropy_error(y,t) :
    delta = 1e-7

    return -np.sum(t * np.log(y + delta))

In [20]:
class SoftmaxWithLoss :
    def __init__(self) :
        self.loss = None # 손실
        self.y = None    # softmax의 출력
        self.t = None    # 정답 레이블(one-hot-vector)

    def forward(self, x, t) :
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)

        return self.loss

    def backward(self, dout= 1) :
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx

## 5.7 오차역전파법 구현하기(p.179)

### 5.7.2 오차역전파법을 적용한 신경망 구현하기
240125

In [21]:
# 구글 드라이브 마운트하기
from google.colab import drive
drive.mount('/content/drive')

# .py 파일이 위치한 경로로 이동
%cd /content/drive/MyDrive/Colab Notebooks/밑바닥부터 시작하는 딥러닝/dataset

import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict

# Affine 계층
class Affine :
    def __init(self, W, b) :
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    def forward(self, x) :
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out

    def backward(self, dout) :
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis= 0)

        return dx

# ReLU 계층
class Relu :
    def __init__(self) :
        self.mask = None

    def forward(self, x) :
        # x값이 0보다 작거나 같으면 True, 0보다 크면 False
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout) :
        dout[self.mask] = 0
        dx = dout

        return dx

# Softmax-With-Loss 계층
class SoftmaxWithLoss :
    def __init__(self) :
        self.loss = None # 손실
        self.y = None    # softmax의 출력
        self.t = None    # 정답 레이블(one-hot-vector)

    def softmax(a) :
        exp_a = np.exp(a)
        sum_exp_a = np.sum(exp_a)
        y = exp_a / sum_exp_a

        return y

    def cross_entropy_error(y, t) :
        delta = 1e-7

        return -np.sum(t * np.log(y + delta))

    def forward(self, x, t) :
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)

        return self.loss

    def backward(self, dout= 1) :
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx

class TwoLayerNet :
    def __init__(self, input_size, hidden_size, output_size, weight_init_std= 0.01) :
        # 가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        # 계층 생성
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

        self.lastLayer = SoftmaxWithLoss()

    def predict(self, x) :
        for layer in self.layers.values() :
            x = layer.forward(x)

        return x

    def loss(self, x, t) : # x : 입력 데이터, t : 정답 레이블
        y = self.predict(x)
        return self.lastLayer.forward(y, t)

    def accuracy(self, x, t) :
        y = self.predict(x)
        y = np.argmax(y, axis= 1)

        if t.ndim != 1 : t = np.argmax(t, axis= 1)

        accuracy = np.sum(y == t) / float(x.shape[0])

        return accuracy

    def numerical_gradient(self, x, t) : # x : 입력 데이터, t : 정답 레이블
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads

    def gradient(self, x, t) :
        # 순전파
        self.loss(x, t)

        # 역전파
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers :
            dout = layer.backward(dout)

        # 결과 저장
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db

        return grads

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks/밑바닥부터 시작하는 딥러닝/dataset


### 5.7.3 오차역전파법으로 구한 기울기 검증하기

In [23]:
# 구글 드라이브 마운트하기
from google.colab import drive
drive.mount('/content/drive')

# .py 파일이 위치한 경로로 이동
%cd /content/drive/MyDrive/Colab Notebooks/밑바닥부터 시작하는 딥러닝/dataset

import numpy as np
from mnist import load_mnist
from two_layer_net import TwoLayerNet

# 데이터 불러오기
(x_train, t_train), (x_test, t_test) = load_mnist(normalize= True, one_hot_label= True)

# Net
network = TwoLayerNet(input_size= 784, hidden_size= 50, output_size= 10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

# 각 가중치의 차이의 절댓값을 구한 후, 그 절댓값들의 평균을 도출
for key in grad_numerical.keys() :
    diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ' : ' + str(diff))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks/밑바닥부터 시작하는 딥러닝/dataset
W1 : 2.074286660233999e-10
b1 : 1.0278634789991472e-09
W2 : 6.937796280880038e-08
b2 : 1.396461986338804e-07


### 5.7.4 오차역전파법을 사용한 학습 구현하기

In [24]:
# 구글 드라이브 마운트하기
from google.colab import drive
drive.mount('/content/drive')

# .py 파일이 위치한 경로로 이동
%cd /content/drive/MyDrive/Colab Notebooks/밑바닥부터 시작하는 딥러닝/dataset

import numpy as np
from mnist import load_mnist
from two_layer_net import TwoLayerNet

# 데이터 불러오기
(x_train, t_train), (x_test, t_test) = load_mnist(normalize= True, one_hot_label= True)

# Net
network = TwoLayerNet(input_size= 784, hidden_size= 50, output_size= 10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num) :
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # 오차역전파법으로 기울기 도출
    grad = network.gradient(x_batch, t_batch)

    # 갱신
    for key in ('W1','b1','W2','b2') :
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0 :
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)

        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)

        print(train_acc, test_acc)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks/밑바닥부터 시작하는 딥러닝/dataset
0.09963333333333334 0.0991
0.7816833333333333 0.7893
0.8778833333333333 0.8809
0.8989166666666667 0.9006
0.9083166666666667 0.9118
0.9148166666666666 0.9162
0.91915 0.9211
0.9236166666666666 0.9247
0.928 0.9283
0.9311833333333334 0.9322
0.9342666666666667 0.9339
0.9371333333333334 0.9374
0.93985 0.9384
0.9410166666666666 0.9402
0.9437666666666666 0.942
0.9453 0.9441
0.9471666666666667 0.9459
