In [5]:
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
from common.layers import *
from common.functions import softmax, cross_entropy_error
from common.gradient import numerical_gradient
from collections import OrderedDict

class Net:
    def __init__(self,input_size,hidden_size,output_size,wis=0.01):
        self.params={}
        self.n = int(input('layer number?' ))
        for i in range(self.n):
            if i == 0:
                self.params['W{0}'.format(i+1)]=wis*np.random.randn(input_size,hidden_size)
                self.params['b{0}'.format(i+1)]=wis*np.random.randn(hidden_size)
            elif i == self.n-1:
                self.params['W{0}'.format(i+1)]=wis*np.random.randn(hidden_size,output_size)
                self.params['b{0}'.format(i+1)]=wis*np.random.randn(output_size)
            else:
                self.params['W{0}'.format(i+1)]=wis*np.random.randn(hidden_size,hidden_size)
                self.params['b{0}'.format(i+1)]=wis*np.random.randn(hidden_size)
        
        self.layers = OrderedDict()
        for i in range(self.n):
            if i != self.n-1:
                self.layers['Affine{0}'.format(i+1)]=Affine(self.params['W{0}'.format(i+1)],self.params['b{0}'.format(i+1)])
                self.layers['Relu{0}'.format(i+1)]=Relu()
            if i == self.n-1:
                self.layers['Affine{0}'.format(i+1)]=Affine(self.params['W{0}'.format(i+1)],self.params['b{0}'.format(i+1)])
        
        self.lastLayer = SoftmaxWithLoss()
    
    def predict(self,x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def loss(self,x,t):
        y = self.predict(x)
        return self.lastLayer.forward(y,t)
    
    def accuracy(self,x,t):
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        if t.ndim!=1:
            t = np.argmax(t,axis=1)
        accuracy = np.sum(y==t)/float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self,x,t):
        loss_W = lambda W: self.loss(x,t)
        
        grads = {}
        for i in range(self.n):
            grads['W{0}'.format(i+1)]=numerical_gradient(loss_W,self.params['W{0}'.format(i+1)])
            grads['b{0}'.format(i+1)]=numerical_gradient(loss_W,self.params['b{0}'.format(i+1)])
        return grads
    
    def gradient(self,x,t):
        self.loss(x,t)
        dout=1
        dout=self.lastLayer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        grads = {}
        for i in range(self.n):
            grads['W{0}'.format(i+1)] = self.layers['Affine{0}'.format(i+1)].dW
            grads['b{0}'.format(i+1)] = self.layers['Affine{0}'.format(i+1)].db
        return grads
    
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = Net(input_size=784, hidden_size=50, output_size=10)
x_batch = x_train[:3]
t_batch = t_train[:3]

# for i in range(network.n):
#     print(network.params['W{0}'.format(i+1)].shape)
#     print(network.params['b{0}'.format(i+1)].shape)

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

# 각 가중치의 절대 오차의 평균을 구한다.
for key in grad_numerical.keys():
    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )
    print(key + ":" + str(diff))


layer number?10
W1:1.452954809821416e-14
b1:9.61275272868052e-14
W2:1.1680665413923712e-13
b2:6.947891820398404e-13
W3:1.2940771506280374e-13
b3:3.204007230173829e-13
W4:2.0713062462335653e-13
b4:4.811131008006953e-13
W5:2.6446816236062787e-13
b5:8.646465103253858e-11
W6:2.569742685531784e-13
b6:1.737957575536654e-09
W7:3.26173805098564e-13
b7:1.1620366345242415e-07
W8:5.013525276581033e-13
b8:7.326637593114364e-11
W9:7.2251948852351894e-12
b9:2.5647007094925552e-05
W10:6.659137819425636e-10
b10:1.3946742661169463e-07
