In [3]:
from common.functions import *
from common.gradient import numerical_gradient
from util.layers import *
from collections import OrderedDict

In [6]:
class TwoLayerNet :
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        self.params={}
        ## 인풋 -> 은닉 : 초기값 설정
        self.params['W1']= weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1']= np.zeros(hidden_size)
        ## 은닉 -> 아웃풋 : 초기값 설정
        self.params['W2']= weight_init_std  * np.random.randn(hidden_size, output_size)
        self.params['b2']=np.zeros(output_size)
        
        self.layers = OrderedDict()
        ## layers dict에 계층의 wx + b 와 활성함수(relu)를 쌍으로 담는다.
        self.layers['Affine1']= Affine(self.params['W1'], self.params['b1'])
        self.layers['relu1'] = Relu()
        self.layers['Affine2']= Affine(self.params['W2'], self.params['b2'])

        ## 마지막 아웃풋 layer에서 사용할 함수 softmax(예측값 3개 이상시 사용) and loss(손실 값 확인 후 감소 위한)
        self.last_layers = SoftmaxWithLoss()        
    ## predict = forward
    def predict(self , x ):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def loss(self, x,t):
        y= self.predict(x)
        
        ## SoftmaxWithLoss() 클래스의 forward 함수 사용
        return self.last_layers.forward(y,t)

    def accuracy (self, x,t):
        y= self.predict(x)
        y= np.argmax(y,axis=1)
        t= np.argmax(t, axis=1)
        
        accuracy = np.sum(y==t)/ x.shape[0]
        return accuracy
    
    def numerical_gradient(self, x,t):
        loss_W = lambda _ : self.loss(x,t)
        
        grads={}
        grads['W1']= numerical_gradient(loss_W, self.params['W1'])
        grads['b1']= numerical_gradient(loss_W, self.params['b1'])
        grads['W2']= numerical_gradient(loss_W, self.params['W2'])
        grads['b2']= numerical_gradient(loss_W, self.params['b2'])
        
        return grads

    def gradient(self, x,t):
        ## dout은 backward 할때 미분계수
        self.loss(x,t)
        dout =1
        dout = self.last_layers.backward(dout)
        
        ## backward에서는 layers에 과정을 거꾸로 해야 하기 때문에 .reverse()
        layers= list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        grads={}
        grads['W1']= self.layers['Affine1'].dW
        grads['b1']= self.layers['Affine1'].db
        grads['W2']= self.layers['Affine2'].dW
        grads['b2']= self.layers['Affine2'].db

        return grads


In [2]:
from util.mnist import load_mnist

In [20]:
## 데이터셋 불러오기
(x_train , t_train),(x_test , t_test) = load_mnist(normalize=True, one_hot_label=True)
## nn 파라미터 세팅
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

# 배치 사이즈 설정
x_batch = x_train[:3]
t_batch = t_train[:3]

## 미분을 구하면서 하는 역전파 과정
grad_numerical =network.numerical_gradient(x_batch,t_batch)
## 순전파의 미분계수를 이용해 단순 계산만 하는 역전파
grad_backpropa = network.gradient(x_batch, t_batch)


In [22]:
for key in grad_numerical.keys():
    
    print(key, grad_backpropa[key].shape, grad_numerical[key].shape)
    ## 두개의 알고리즘의 차이를 확인하고자 - 연산
    diff = np.average(np.abs(grad_backpropa[key] - grad_numerical[key]))
    print(key + ':' + str(diff))

W1 (784, 50) (784, 50)
W1:5.9939684989628335e-09
b1 (50,) (50,)
b1:4.703852263208085e-08
W2 (50, 10) (50, 10)
W2:8.616688927349332e-08
b2 (10,) (10,)
b2:1.8002014676360422e-06


In [4]:
from tqdm import tqdm

In [7]:
(x_train , t_train),(x_test , t_test) = load_mnist(normalize=True, one_hot_label=True)

## setting
train_loss_list=[]
## 정확도 리스트
train_acc_list=[]
test_acc_list=[]
iter_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.3

#
iter_per_epoch = train_size / batch_size
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

## 만번 수행
for i in tqdm(range(iter_num)):
    batch_mask = np.random.choice(train_size,batch_size)
    ## 랜덤 행렬 100개 넣는다.
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    ## 100 개의 이미지씩 기울기를 구한다.
    grad = network.gradient(x_batch, t_batch)
    
    for key in ('W1','b1','W2','b2'):
        ## 네트워크에 있는 w와 b에 grad의 미분값을 러닝레이트에 곱한 값을 빼준다. 
        network.params[key]-= learning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    

    ## epoach 돌때마다 정확도 출력
    if i % iter_per_epoch ==0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc =  network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print (f'train acc : {str(train_acc)}/ test acc {str(test_acc)}')
        

  2%|▏         | 216/10000 [00:00<00:15, 650.73it/s]

train acc : 0.11925/ test acc 0.1227


  7%|▋         | 717/10000 [00:01<00:15, 581.30it/s]

train acc : 0.9246333333333333/ test acc 0.9243


 14%|█▎        | 1363/10000 [00:02<00:13, 630.37it/s]

train acc : 0.9504/ test acc 0.9502


 19%|█▉        | 1905/10000 [00:02<00:13, 612.08it/s]

train acc : 0.95835/ test acc 0.9531


 25%|██▌       | 2542/10000 [00:03<00:11, 631.75it/s]

train acc : 0.97275/ test acc 0.9675


 31%|███       | 3080/10000 [00:04<00:11, 617.35it/s]

train acc : 0.974/ test acc 0.9677


 37%|███▋      | 3727/10000 [00:05<00:12, 504.13it/s]

train acc : 0.9741833333333333/ test acc 0.9677


 43%|████▎     | 4273/10000 [00:06<00:10, 554.88it/s]

train acc : 0.9783666666666667/ test acc 0.9691


 50%|████▉     | 4952/10000 [00:07<00:09, 544.82it/s]

train acc : 0.9822/ test acc 0.9706


 56%|█████▌    | 5605/10000 [00:08<00:06, 697.55it/s]

train acc : 0.9836166666666667/ test acc 0.9735


 62%|██████▏   | 6154/10000 [00:09<00:06, 603.92it/s]

train acc : 0.9847666666666667/ test acc 0.974


 67%|██████▋   | 6673/10000 [00:10<00:06, 517.62it/s]

train acc : 0.9839/ test acc 0.9715


 73%|███████▎  | 7327/10000 [00:11<00:05, 511.43it/s]

train acc : 0.9849833333333333/ test acc 0.9709


 79%|███████▉  | 7921/10000 [00:12<00:03, 520.46it/s]

train acc : 0.9884166666666667/ test acc 0.9742


 85%|████████▌ | 8521/10000 [00:13<00:02, 556.56it/s]

train acc : 0.9890333333333333/ test acc 0.9751


 92%|█████████▏| 9170/10000 [00:14<00:01, 640.63it/s]

train acc : 0.9868833333333333/ test acc 0.973


 97%|█████████▋| 9717/10000 [00:15<00:00, 628.72it/s]

train acc : 0.9915333333333334/ test acc 0.9742


100%|██████████| 10000/10000 [00:15<00:00, 649.09it/s]
