In [1]:
cd /content/drive/MyDrive/AI부트캠프/SeSAC_DL/

/content/drive/MyDrive/AI부트캠프/SeSAC_DL


In [31]:
import numpy as np
from tqdm import tqdm

In [None]:
from common.functions import *
from common.gradient import numerical_gradient
from util.layers import *
from collections import OrderedDict

In [25]:
class TwoLayerNet:
    def __init__(self,input_size,hidden_size,output_size,
                 weight_init_std=0.01):
        self.params={}
        self.params['W1']=weight_init_std*np.random.randn(input_size,hidden_size) # 784x100
        self.params['b1']=np.zeros(hidden_size)
        self.params['W2']=weight_init_std*np.random.randn(hidden_size,output_size) # 100x10
        self.params['b2']=np.zeros(output_size)

        self.layers=OrderedDict()
        self.layers['Affine1']=Affine(self.params['W1'],self.params['b1'])
        self.layers['Relu1']=Relu()
        self.layers['Affine2']=Affine(self.params['W2'],self.params['b2'])
        self.lastlayers=SoftmaxWithLoss()
        
    def predict(self,x):
        for layer in self.layers.values():
            x=layer.forward(x)
        
        return x
    
    def loss(self,x,y_true):
        y=self.predict(x)
        loss=self.lastlayers.forward(y,y_true)
        
        return loss
    
    def accuracy(self,x,y_true):
        y_pred=self.predict(x)
        y_pred=np.argmax(y_pred,axis=1)
        y_true=np.argmax(y_true,axis=1)
        
        accuracy=np.sum(y_pred==y_true)/x.shape[0]
        return accuracy
    
    def numerical_gradient(self,x,y_true):
        loss_W=lambda _:self.loss(x,y_true)
        
        grads={}
        grads['W1']=numerical_gradient(loss_W,self.params['W1'])
        grads['b1']=numerical_gradient(loss_W,self.params['b1'])
        grads['W2']=numerical_gradient(loss_W,self.params['W2'])
        grads['b2']=numerical_gradient(loss_W,self.params['b2'])
        
        return grads

    def gradient(self,x,y_true):
        self.loss(x,y_true)

        dout=1
        dout=self.lastlayers.backward(dout)
        
        layers=list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout=layer.backward(dout)
        
        grads={}
        grads['W1']=self.layers['Affine1'].dW
        grads['b1']=self.layers['Affine1'].db
        grads['W2']=self.layers['Affine2'].dW
        grads['b2']=self.layers['Affine2'].db

        return grads

In [26]:
from util.mnist import load_mnist

In [27]:
(x_train,t_train),(x_test,t_test)=load_mnist(normalize=True,one_hot_label=True)

In [28]:
network=TwoLayerNet(input_size=784,hidden_size=50,output_size=10)

x_batch=x_train[:3]
t_batch=t_train[:3]

grad_numerical=network.numerical_gradient(x_batch,t_batch)
grad_backprop=network.gradient(x_batch,t_batch)

In [29]:
for key in grad_numerical.keys():
    print(f'{key}.shape : {grad_numerical[key].shape}, {grad_backprop[key].shape}')
    print(f'{key} diff : {np.sum(np.abs(grad_numerical[key]-grad_backprop[key]))}')

W1.shape : (784, 50), (784, 50)
W1 diff : 0.00826450967185012
b1.shape : (50,), (50,)
b1 diff : 0.00016754369697103826
W2.shape : (50, 10), (50, 10)
W2 diff : 4.68135225616928e-10
b2.shape : (10,), (10,)
b2 diff : 1.2012612987666316e-09


In [32]:
train_loss_list=[]

#추가
train_acc_list=[]
test_acc_list=[]

iters_num=10000
train_size=x_train.shape[0]
batch_size=100
learning_rate=0.1

#추가
iter_per_epoch=train_size/batch_size


network=TwoLayerNet(input_size=784,hidden_size=50,output_size=10)

for i in tqdm(range(iters_num)):
    batch_mask=np.random.choice(train_size,batch_size)
    x_batch=x_train[batch_mask]
    t_batch=t_train[batch_mask]
    
    grad=network.gradient(x_batch,t_batch)
    
    for key in ('W1','b1','W2','b2'):
        network.params[key]-=learning_rate*grad[key]
    
    loss=network.loss(x_batch,t_batch)
    train_loss_list.append(loss)
    
    if i%iter_per_epoch==0:
        train_acc=network.accuracy(x_train,t_train)
        test_acc=network.accuracy(x_test,t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print('train acc, test acc | '+str(train_acc)+','+str(test_acc))

  0%|          | 20/10000 [00:00<04:23, 37.89it/s] 

train acc, test acc | 0.14556666666666668,0.1518


  6%|▋         | 636/10000 [00:04<01:25, 109.70it/s]

train acc, test acc | 0.9037666666666667,0.9046


 12%|█▏        | 1232/10000 [00:07<01:07, 129.62it/s]

train acc, test acc | 0.9257,0.9255


 18%|█▊        | 1838/10000 [00:10<01:07, 121.46it/s]

train acc, test acc | 0.9389333333333333,0.937


 24%|██▍       | 2439/10000 [00:13<00:59, 126.38it/s]

train acc, test acc | 0.9445666666666667,0.9425


 30%|███       | 3039/10000 [00:16<00:54, 127.48it/s]

train acc, test acc | 0.95325,0.9518


 36%|███▋      | 3648/10000 [00:19<00:47, 133.53it/s]

train acc, test acc | 0.9568,0.9538


 42%|████▏     | 4224/10000 [00:22<00:57, 100.79it/s]

train acc, test acc | 0.9602166666666667,0.9578


 48%|████▊     | 4844/10000 [00:25<00:41, 124.97it/s]

train acc, test acc | 0.9648666666666667,0.9608


 54%|█████▍    | 5445/10000 [00:28<00:34, 132.27it/s]

train acc, test acc | 0.9673833333333334,0.9616


 60%|██████    | 6035/10000 [00:31<00:38, 104.03it/s]

train acc, test acc | 0.9691666666666666,0.9619


 66%|██████▋   | 6638/10000 [00:34<00:27, 121.62it/s]

train acc, test acc | 0.9711333333333333,0.9655


 72%|███████▏  | 7244/10000 [00:37<00:21, 128.35it/s]

train acc, test acc | 0.9744666666666667,0.9673


 78%|███████▊  | 7839/10000 [00:40<00:16, 128.62it/s]

train acc, test acc | 0.97515,0.9675


 84%|████████▍ | 8434/10000 [00:43<00:12, 129.92it/s]

train acc, test acc | 0.9774833333333334,0.9697


 90%|█████████ | 9030/10000 [00:46<00:08, 110.43it/s]

train acc, test acc | 0.9775,0.9678


 96%|█████████▋| 9645/10000 [00:49<00:02, 126.36it/s]

train acc, test acc | 0.9797833333333333,0.9702


100%|██████████| 10000/10000 [00:50<00:00, 197.51it/s]
