In [1]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
    
    def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y
        return out
    def backward(self, dout):
        dx = dout * self.y
        dy = dout * self.x


        return dx, dy




In [2]:
apple = 100
apple_num = 2
tax = 1.1

#layer
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

#forward

apple_price = mul_apple_layer.forward(apple, apple_num)  #apple_price = 200
price = mul_tax_layer.forward(apple_price, tax)   #price = 220

print(price)

220.00000000000003


In [3]:
# backward

dprice = 1
dapple_price,dtax = mul_tax_layer.backward(dprice)   #dapple_price = 1.1, dtax = 200
dapple, dapple_num = mul_apple_layer.backward(dapple_price)#dapple = 2.2, dapple_num = 110

print(dapple, dapple_num, dtax)

2.2 110.00000000000001 200


In [4]:
class AddLayer():
    def __init__(self):
        pass
    def forward(self, x, y):
        out = x + y
        return out
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy

In [5]:
class Relu:
    def __init__(self):
        self.mask = None
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        
        return out

        def backward(self, dout):
            dout[self.mask] = 0
            dx = dout

            return dx 

In [6]:
import numpy as np
x = np.array([[1, -0.5], [-2, 3]])
print(x)

[[ 1.  -0.5]
 [-2.   3. ]]


In [7]:
mask = (x <= 0)
mask

array([[False,  True],
       [ True, False]])

In [8]:
X_dot_W = np.array([[0,0,0],[10,10,10]])
b = np.array([1,2,3])
X_dot_W

array([[ 0,  0,  0],
       [10, 10, 10]])

In [9]:
X_dot_W +b

array([[ 1,  2,  3],
       [11, 12, 13]])

In [10]:
dy = np.array([[1, 2, 3],[4, 5, 6]])
dy

array([[1, 2, 3],
       [4, 5, 6]])

In [11]:
db = np.sum(dy, axis = 0)
db

array([5, 7, 9])

In [12]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
    
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out
    
    def backward(self, dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T, dout)

        self.db = np.sum(dout, axis = 0)



In [13]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict


In [14]:
class TwoLayerNet:
    def __init__(self, input, hidden_size, output_size, weight_init_std = 0.01):

        #初始化权重
        self.params = {}
        self.params['W1'] = weight_init_std *np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        elf.params['W2'] = weight_init_std *np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        #生成层
        self.layers = OrderedDict()
        self.layers['Affinel'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss()
    
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    # x:输入数据， t:监督数据

    def loss(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis = 1)
        if t.ndim !=1:
            t = np.argmax(t, axis = 1)
            accuracy = np.sum(y == t)/float(x.shape[0])
            return accuracy
    
    # x:输入数据， t:监督数据

    def numerical_gradient(self, x, t):
        loss_W = lambda W : self.loss(x.t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads
    
    def gradient(self, x, t):
        #forward

        self.loss(x,t)

        #backward

        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())

        layers.reverse()

        for layer in layers:
            dout = layer.backward(dout)

        #设定

        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        return grads


        

In [3]:
import sys, os
sys.path.append("/home/software/deep_learning/dive_into_deep_learning/ch05")
import numpy as np
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

In [11]:
(x_train, t_train), (x_test, t_test) =  load_mnist(normalize=True, one_hot_label = True)
network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)
x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

for key in grad_numerical.keys():
    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ":" + str(diff))

W1:4.726012621751128e-06
b1:4.034930836167675e-05
W2:5.712039267222153e-09
b2:1.3993894873998158e-07


In [13]:
x_batch.shape

(3, 784)

In [20]:
grad_numerical['W1'].shape

(784, 50)