In [1]:
import sys, os
sys.path.append('C:\\Users\\pleiony_seo\\swoos91\\deep-learning-from-scratch')

from common.layers import *
from common.gradient import numerical_gradient
import numpy as np
from collections import OrderedDict

In [8]:
class TwoLayerNets:
    
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.1):
        
        self.params = {}
        self.params['W1'] = weight_init_std * \
                            np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * \
                            np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
        
        self.layers = OrderedDict()
        self.layers['Affine1'] = \
            Affine(self.params['W1'], self.params['b1'])
        self.layers['ReLU1'] = Relu()
        self.layers['Affine2'] = \
            Affine(self.params['W2'], self.params['b2'])
        
        self.lastLayer = SoftmaxWithLoss()
        
        
    def predict(self, x):
        
        for layer in self.layers.values():
            x = layer.forward(x)
            
        return x
    
    
    def loss(self, x, t):
        
        y = self.predict(x)
                
        return self.lastLayer.forward(y, t)
    
    
    def accuracy(self, x, t):
        
        y = self.predict(x)
        y = np.argmax(y, axis = 1)
        
        if t.ndim != 1: t = np.argmax(t, axis = 1)
            
        accuracy = np.sum( y == t ) / float(x.shape[0])
        
        return accuracy
    
    
    def gradient(self, x, t):
        
        self.loss(x, t)
        
        # 역전파
        
        dout = 1
        dout = self.lastLayer.backward( dout )
        
        layers = list( self.layers.values() )
        layers.reverse()
        for layer in layers:
            dout = layer.backward( dout )
        
        # 결과 저장
        
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        
        return grads

### 기울기 검증

In [10]:
from dataset.mnist import load_mnist
from ch05.two_layer_net import TwoLayerNet

(x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize = True, one_hot_label=True)

In [40]:
network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_backprop = network.gradient(x_batch, t_batch)
print(grad_backprop)

{'W1': array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]]), 'b1': array([-0.00563132,  0.00186932,  0.        , -0.0011059 ,  0.        ,
       -0.00015333, -0.00142112,  0.00020243,  0.00547082, -0.00060998,
        0.        ,  0.00382504, -0.00282845, -0.0001679 ,  0.00215384,
        0.00643584, -0.00448279,  0.        , -0.00065463,  0.00847223,
        0.        ,  0.        ,  0.00589056,  0.        ,  0.00561824,
        0.        , -0.00179807,  0.01038469,  0.0012163 ,  0.        ,
        0.        , -0.00260313,  0.0026442 , -0.00332606,  0.        ,
       -0.00093012,  0.00092646,  0.        , -0.00112808, -0.00293398,
        0.00264716,  0.00010499, -0.00087383, -0.0003632 ,  0.00273436,
        0.00396372,  0.00157995,  0.00050145,  0.00592062, -0.00490986]), 'W2': array([[ 2.473154