In [8]:
# 곱셈계층 구현
class MulLayer : 
    def __init__(self) :
        self.x = None
        self.y = None
        
    def forward(self, x, y) :
        self.x = x
        self.y = y
        out = x * y
        
        return out
    
    def backward(self, dout) : # dout : 상류에서 흘러온 미분값, 즉, 역전파에서의 입력값
        dx = dout * self.y
        dy = dout * self.x
        
        return dx, dy

In [9]:
apple = 100
apple_num = 2
tax = 1.1

# 계층들
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# 순전파
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)
print(price)

220.00000000000003


In [10]:
# 덧셈 계층
class AddLayer :
    def __init__(self) :
        pass
    
    def forward(self, x, y) :
        out = x + y
        return out
    
    def backward(self, dout) :
        dx = dout * 1
        dy = dout * 1
        
        return dx, dy

In [11]:
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

# 계층들
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

# 순전파
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orange_layer.forward(orange, orange_num)
all_price = add_apple_orange_layer.forward(apple_price, orange_price)
price = mul_tax_layer.forward(all_price, tax)

# 역전파
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)
dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(price)
print(dapple_num, dapple, dorange_num, dorange, dtax)

715.0000000000001
110.00000000000001 2.2 165.0 3.3000000000000003 650


In [12]:
class ReLU :
    def __init__(self) :
        self.mask = None
        
    def forward(self, x) :  # x는 넘파이 배열이고, mask는 True, False로 구성된 인스턴스 변수이다.
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        
        return out
    
    def backward(self, dout) :
        dout[self.mask] = 0
        dx = dout
        
        return dx

In [13]:
import numpy as np

x = np.array([[1.0, -0.5], [-2.0, 3.0]])
print(x)

mask = (x <= 0)
print(mask)

[[ 1.  -0.5]
 [-2.   3. ]]
[[False  True]
 [ True False]]


In [14]:
import numpy as np

class Sigmoid :
    def __init__(self) :
        self.out = None
        
    def forward(self, x) :
        out = 1 / (1 + np.exp(-x))
        self.out = out
        
        return out
    
    def backward(self, dout) :
        dx = dout * (1.0 - self.out) * self.out
        
        return dx

In [15]:
X = np.random.rand(2)
W = np.random.rand(2, 3)
B = np.random.rand(3)

print(X)
print(W)
print(B)

print(X.shape)
print(W.shape)
print(B.shape)

[0.41351484 0.48576136]
[[0.18572483 0.7321401  0.47878456]
 [0.72129491 0.38666096 0.21427247]]
[0.88327578 0.82840354 0.601393  ]
(2,)
(2, 3)
(3,)


In [17]:
# 배치용 Affine 계층
X_dot_W = np.array([[0, 0, 0], [10, 10, 10]])
B = np.array([1, 2, 3])
print(X_dot_W + B)

[[ 1  2  3]
 [11 12 13]]


In [19]:
dY = np.array([[1, 2, 3], [4, 5, 6]])
print(dY)

dB = np.sum(dY, axis = 0)
print(dB)

[[1 2 3]
 [4 5 6]]
[5 7 9]


In [20]:
class Affine :
    def __init__(self, W, b) :
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
        
    def forward(self, x) :
        self.x = x
        out = np.dot(x, self.W) + self.b
        
        return out
    
    def backward(self, dout) :
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis = 0)
        
        return dx

In [21]:
# Softmax-with-Loss 계층
# 신경망에서 수행하는 작업은 학습과 추론이다. 일반적으로 추론에서 softmax 계층을  사용하지 않는다.
# softmax 계층 앞의 Affine 계층에서 점수를 전달하는데, 추론의 경우에는 가장 높은 점수만 알면 되니 softmax계층은 필요 없다.
# 반면 신경망을 학습할때는 Softmax계층이 필요하다.

# 소프트맥스 함수 구현
def softmax(a) :
    exp_a = np.exp(a)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    
    return y

# 배치용 크로스엔트로피 구현
def cross_entropy_error(y, t) :
    if y.ndim == 1 :
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.sum(t * np.log(y + 1e-7)) / batch_size

class SoftmaxWithLoss :
    def __init__(self) :
        self.loss = None
        self.y = None
        self.t = None
        
    def forward(self, x, t) :
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        
        return self.loss
    
    def backward(self, dout = 1) :
        batch_size = self.t.shape[0]
        dx = (self.y - self.x) / batch_size
        
        return dx

In [22]:
# 오차역전파법을 적용한 신경망 구현하기
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict

class TwoLayerNet :
    
    def __init__(self, input_size, hidden_size, output_size,
                 weight_init_std = 0.01) :
        # 가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * \
                            np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * \
                            np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
        # 계층 생성
        self.layers = OrderedDict()
        self.layers['Affine1'] = \
            Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = \
            Affine(self.params['W2'], self.params['b2'])
        
        self.lastLayer = SoftmaxWithLoss()
        
    def predict(self, x) :
        for layer in self.layers.values() :
            x = layer.forward(x)
            
        return x
    
    def loss(self, x, t) :
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t) :
        y = self.predict(x)
        y = np.argmax(y, axis = 1)
        if t.ndim != 1 : 
            t = np.argmax(t, axis = 1)
            
        accuracy = np.sum(y == t) / float(x.shape[0])
        
        return accuracy
    
    def numerical_gradient(self, x ,t) :
        loss_W = lambda W : self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numercial_gradient(loss_W, self.params['b2'])
        
        return grads
    
    def gradient(self, x, t) :
        # 순전파
        self.loss(x, t)
        
        # 역전파
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers :
            dout = layer.backward(dout)
            
        # 결과 저장
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        
        return grads

In [26]:
# 수치미분을 이용해 기울기 검증하기
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

def numerical_gradient(f, x) :
    h = 1e-4
    gradient = np.zeros_like(x)
    
    for index in range(x.size) :
        temp_val = x[index]
        
        x[index] = temp_val + h
        fxh1 = f(x)
        
        x[index] = temp_val - h
        fxh2 = f(x)
        
        gradient[index] = (fxh1 - fxh2) / (2 * h)
        x[index] = temp_val
        
    return gradient

# 데이터 읽기
(x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize = True, one_hot_label = True)

network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

# 각 가중치의 차이의 절댓값을 구한 후, 그 절댓값들의 평균을 낸다.
for key in grad_numerical.keys() :
    diff = np.average(np.abs(grad_backprop[key] - grad_numercial[key]))
    print(key + ":" + str(diff))

W1:0.00028245681970871816
b1:0.0014061452408425518
W2:0.0036568568143519513
b2:0.0032463051765563343


In [32]:
# 오차역전파법을 사용한 학습 구현하기
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

(x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize = True, one_hot_label = True)
network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num) :
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 오차역전파법으로 기울기를 구한다.
    grad = network.gradient(x_batch, t_batch)
    
    # 갱신
    for key in ('W1', 'b1', 'W2', 'b2') :
        network.params[key] -= learning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0 :
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)

0.08406666666666666 0.0842
0.80045 0.8059
0.8770833333333333 0.8822
0.8992333333333333 0.9007
0.9087333333333333 0.9114
0.91595 0.9173
0.9202833333333333 0.9227
0.9247833333333333 0.9264
0.9282333333333334 0.9301
0.9312166666666667 0.9327
0.9345833333333333 0.9355
0.9375666666666667 0.9367
0.9395666666666667 0.9395
0.94195 0.9411
0.9436166666666667 0.9432
0.9452666666666667 0.9435
0.9470333333333333 0.9465


[8 1 0 5 4 6 0 1 3 0 6 8 5 5 7 1 8 2 3 8]


20