## Naive backpropagation Layer
### MulLayer, AddLayer

In [1]:
class MulLayer:
    def __init__(self):
        self.x=None
        self.y=None
        return
    
    def forward(self,x,y):
        self.x=x
        self.y=y
        out = x*y
        return out
    def backward(self,dout):
        dx = dout*self.y
        dy = dout*self.x
        return dx,dy

In [2]:
apple=100
apple_num=2
tax=1.1

mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

apple_price= mul_apple_layer.forward(apple,apple_num)
price = mul_tax_layer.forward(apple_price,tax)

print(price)

220.00000000000003


In [3]:
dprice=1
dapple_price,dtax = mul_tax_layer.backward(dprice)
dapple,dapple_num = mul_apple_layer.backward(dapple_price)
print(dapple,dapple_num,dtax)

2.2 110.00000000000001 200


In [4]:
class AddLayer:
    def __init__(self):
        pass
    
    def forward(self,x,y):
        out = x+y
        return out
    def backward(self,dout):
        dx = dout*1
        dy = dout*1
        return dx,dy

In [5]:
apple = 100
apple_num =2
orange=150
orange_num=3
tax=1.1

# 계층 정의
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()


# 순전파
apple_price = mul_apple_layer.forward(apple,apple_num)
orange_price = mul_orange_layer.forward(orange,orange_num)
all_price = add_apple_orange_layer.forward(apple_price,orange_price)
price = mul_tax_layer.forward(all_price,tax)

# 역전파
dprice=1
dall_price,dtax = mul_tax_layer.backward(dprice)
dapple_price,dorange_price = add_apple_orange_layer.backward(dall_price)
dorange,dorange_num = mul_orange_layer.backward(dorange_price)
dapple,dapple_num = mul_apple_layer.backward(dapple_price)


In [6]:
print(price)
print("dapple num = " + str(dapple_num))
print("dorange num = " + str(dorange_num))
print("dapple = " + str(dapple))
print("dorange = " + str(dorange))
print("dapple_price = " + str(dapple_price))
print("dorange_price = " + str(dorange_price))
print("dall_price = " + str(dall_price))
print("dtax = " + str(dtax))

715.0000000000001
dapple num = 110.00000000000001
dorange num = 165.0
dapple = 2.2
dorange = 3.3000000000000003
dapple_price = 1.1
dorange_price = 1.1
dall_price = 1.1
dtax = 650


## Relu Function

In [7]:
class Relu:
    def __init__(self):
        self.mask = None
        return
    def forward(self,x):
        self.mask= (x<=0)
        out = x.copy()
        out[self.mask]=0
        return out
    def backward(self,dout):
        dout[self.mask]=0
        dx=dout
        return dx


In [8]:
import numpy as np
x = np.array([[1.0,-0.5],[-2.0,3.0]])
print(x)

mask = (x<=0)
print(mask)

r = x.copy()
print(r)
r[mask] =0
print(r)

[[ 1.  -0.5]
 [-2.   3. ]]
[[False  True]
 [ True False]]
[[ 1.  -0.5]
 [-2.   3. ]]
[[1. 0.]
 [0. 3.]]


## sigmoid function

In [9]:
class Sigmoid:
    def __init__(self):
        self.out = None
        return
    def forward(self,x):
        out= 1/(1+np.exp(-x))
        self.out=out
        return out
    def backward(self,dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx        

## Affine

In [10]:
class Affine:
    def __init__(self,W,b):
        self.W=W
        self.b=b
        self.x=None
        self.dW=None
        self.db=None
        return
    def forward(self,x):
        self.x=x
        out = np.dot(x,self.W) +self.b
        return out
    def backward(self,dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)
        
        return dx

## Softmax with loss

In [11]:
def cross_entropy_error(y,t):
    delta = 1e-7
    return -np.sum(t*np.log(y+delta))    

In [12]:
def softmax(x):
    c = np.max(x)
    exp_x = np.exp(x-c)
    exp_sum = np.sum(exp_x)
    y = exp_x / exp_sum
    return y

In [13]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss=None
        self.y = None
        self.t=None
        return
    
    def forward(self,x,t):
        self.t=t
        self.y=softmax(x)
        self.loss = cross_entropy_error(self.y,self.t)
        return self.loss
    
    def backward(self,dout=1):
        batch_size= self.t.shape[0]
        dx = (self.y - self.t) / batch_size
        
        return dx
        
        
        

## total implemantation

In [14]:
import sys,os
sys.path.append(os.curdir)
import numpy as np
from collections import OrderedDict

In [64]:
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 값 복원
        it.iternext()   
        
    return grad

In [75]:
class TwoLayerNet:
    def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.01):
        #가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size,hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size,output_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['b2'] = np.zeros(output_size)
        
        #계층 생성
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'],self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'],self.params['b2'])
        
        self.lastLayer = SoftmaxWithLoss()
        
        return
    
    def predict(self,x):
        for layer in self.layers.values():
            x = layer.forward(x)
            
        return x

    def loss(self,x,t):
        y = self.predict(x)
        return self.lastLayer.forward(y,t)
    
    def accuracy(self,x,t):
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        if t.ndim !=1:
            t = np.argmax(t,axis=1)
            
        accuracy = np.sum(y==t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self,x,t):
        loss_W = lambda W: self.loss(x,t)
        
        grads={}
        grads['W1'] = numerical_gradient(loss_W,self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W,self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W,self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W,self.params['b2'])
        return grads
    
    def gradient(self,x,t):
        self.loss(x,t)
        
        dout = 1 
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
            
        grads={}
        grads['W1'] = self.layers['Affine1'].dW
        grads['W2'] = self.layers['Affine2'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['b2'] = self.layers['Affine2'].db
        
        return grads

## gradient checking

In [76]:
from dataset.mnist import load_mnist

In [77]:
(x_train,t_train),(x_test,t_test) = load_mnist(normalize=True,one_hot_label=True)

network = TwoLayerNet(input_size=784,hidden_size=50,output_size=10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch,t_batch)
grad_backprop = network.gradient(x_batch,t_batch)


In [78]:
grad_backprop

{'W1': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'W2': array([[-1.26912650e-03,  2.34481080e-03,  2.35981651e-03,
          2.36162333e-03, -1.69137445e-02, -4.53704130e-02,
          2.36262892e-03,  2.36402740e-03,  2.33827985e-03,
          2.35313060e-03],
        [-1.07667632e-02,  3.70752887e-04,  3.74089746e-04,
          3.72786331e-04,  3.73557032e-04,  3.69427260e-04,
          3.73087332e-04,  3.73217496e-04,  3.67852092e-04,
          3.69018504e-04],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00],
        [-1.92096857e-02,  1.31337984e-03,  1.32178010e-03,
          1.32114138e-03, -9.30350939e-03, -7.01397741e-03

In [79]:
print(np.shape(grad_numerical['W1']))
print(np.shape(grad_backprop['W1']))
print(np.shape(grad_numerical['W2']))
print(np.shape(grad_backprop['W2']))
print(np.shape(grad_numerical['b1']))
print(np.shape(grad_backprop['b1']))
print(np.shape(grad_numerical['b2']))
print(np.shape(grad_backprop['b2']))

(784, 50)
(784, 50)
(50, 10)
(50, 10)
(50,)
(50,)
(10,)
(10,)


In [80]:
grad_numerical

{'W1': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'b1': array([ 0.03549697, -0.01329112,  0.        , -0.00999065,  0.0310888 ,
         0.00843219, -0.0090872 ,  0.00752488, -0.00699499,  0.00023641,
         0.01088846, -0.0015998 , -0.0023065 ,  0.01149271,  0.        ,
         0.00401107,  0.00180943,  0.02950298, -0.03061877,  0.        ,
         0.        , -0.00198266,  0.        ,  0.        ,  0.02304056,
         0.        , -0.00316473, -0.00554028,  0.        , -0.01412044,
        -0.01344207, -0.00347675,  0.00209107,  0.00184195,  0.        ,
         0.01267874,  0.        , -0.00945831, -0.00846167,  0.01176266,
         0.01259323,  0.01354068, -0.00916379, -0.01537839, -0.00377523,
         0.0040857 ,  0.01109319,  0.01742014,  0.01110479, -0.01705378]),
 'W2': 

In [81]:
for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_numerical[key]-grad_backprop[key]))
    print(key + ":" + str(diff))

W1:0.0009242580080515475
b1:0.005912813056416095
W2:0.011736522659001257
b2:0.30693487614812864


## Learning implementation using Error Backpropagation

In [96]:
sys.path.clear()
sys.path.append(os.curdir)
sys.path.append("deep-learning-from-scratch-master/ch05")

In [97]:
from two_layer_net import TwoLayerNet

In [98]:
(x_train,t_train),(x_test,t_test) = load_mnist(normalize=True,one_hot_label=True)
network = TwoLayerNet(input_size=784,hidden_size=50,output_size=10)

iters_num=10000
train_size= x_train.shape[0]
batch_size=100
learning_rate=0.1

train_loss_list=[]
train_acc_list=[]
test_acc_list=[]

iter_per_epoch = max(train_size/batch_size,1)
for i in range(iters_num):
    batch_mask = np.random.choice(train_size,batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.gradient(x_batch,t_batch)
    
    for key in ('W1','b1','W2','b2'):
        network.params[key] -= learning_rate * grad[key]
        
    loss = network.loss(x_batch,t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch==0:
        train_acc = network.accuracy(x_train,t_train)
        test_acc = network.accuracy(x_test,t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc,test_acc)

0.12771666666666667 0.1272
0.9047333333333333 0.9087
0.92375 0.9253
0.9371833333333334 0.9365
0.9469166666666666 0.9448
0.95255 0.9495
0.9569666666666666 0.9521
0.9611666666666666 0.9569
0.9639833333333333 0.9602
0.9678 0.9622
0.9700166666666666 0.9638
0.9721166666666666 0.9668
0.9742666666666666 0.9659
0.9745666666666667 0.9668
0.97705 0.9708
0.9770666666666666 0.9698
0.9791333333333333 0.9711


In [99]:
train_loss_list

[2.299180956922827,
 2.299755871474717,
 2.300426796791976,
 2.2993846019145376,
 2.2992572066877695,
 2.2965135521814277,
 2.295878562692302,
 2.2969454427169587,
 2.2968372746287486,
 2.2930448113193136,
 2.294036753720423,
 2.2916488828173938,
 2.291576154698156,
 2.291200735704839,
 2.2925226617475025,
 2.291062260684916,
 2.291852700288524,
 2.2873489250780437,
 2.2840590527273794,
 2.2884321230550086,
 2.283204748120642,
 2.282986542455255,
 2.27790130788563,
 2.2833496943591665,
 2.274453083152085,
 2.264461346869373,
 2.2746744668126566,
 2.2790244857428905,
 2.2797344122255945,
 2.2604173344962164,
 2.2660021617309223,
 2.256270119267798,
 2.260969763921925,
 2.2591493540111203,
 2.248201220741891,
 2.2475540912422236,
 2.221140699964203,
 2.235183872694645,
 2.2144011052245487,
 2.2142464104814463,
 2.217777213056757,
 2.2022039446489874,
 2.1888094309896062,
 2.1906423071879426,
 2.197577042699381,
 2.1532953720123937,
 2.1373309198277157,
 2.1630683544685767,
 2.17122795504