In [1]:
# 乘法层
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None

    def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y

        return out
    
    def backward(self, dout):
        dx = dout * self.y  # 对于乘法运算的链式传播，是将上层的局部导数乘以输入信号的反转值（对x求导->乘以输入信号y）
        dy = dout * self.x

        return dx, dy

In [2]:
# 买苹果
apple = 100
apple_num = 2
tax = 1.1

mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)
print(price)

# backward
dprice = 1  # 局部导数
dapple_price, dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)
print(dapple_price, dtax, dapple, dapple_num)

220.00000000000003
1.1 200 2.2 110.00000000000001


In [3]:
# 加法层
class AddLayer:
    def __init__(self):
        # self.x = None 可省略，因为加法反向传播不需要输入信号的值
        # self.y = None
        pass
    
    def forward(self, x, y):
        # self.x = x
        # self.y = y
        out = x + y
        
        return out
    
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1

        return dout, dout

![image.png](./add_mul.png)

In [4]:
apple = 100     # 苹果单价
apple_num = 2
orange = 150    # 橘子单价
orange_num = 3
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)                 # (1)
orange_price = mul_orange_layer.forward(orange, orange_num)             # (2)
all_price = add_apple_orange_layer.forward(apple_price, orange_price)   # (3)
price = mul_tax_layer.forward(all_price, tax)                                     # (4)

# backward
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)                           # (4)
dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)   # (3)
dorange, dorange_num = mul_orange_layer.backward(dorange_price)             # (2)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)                 # (1)

price, dapple_num, dapple, dorange_num, dorange

(715.0000000000001, 110.00000000000001, 2.2, 165.0, 3.3000000000000003)

In [5]:
# mask的运用
import numpy as np
x = np.array([[1.0, -0.5], [-2.0, 3.0]])
mask = (x <=0 )
x, mask, x[mask], id(x) == id(x.copy())  # .copy() 会重新生成一个与原数组一摸一样的新对象

(array([[ 1. , -0.5],
        [-2. ,  3. ]]),
 array([[False,  True],
        [ True, False]]),
 array([-0.5, -2. ]),
 False)

In [6]:
# ReLU层
class Relu:
    def __init__(self):
        self.mask = None
    
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[mask] = 0  # 将神经元输出小于0的值设置为0，其余保持不变
        return out
    
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx

In [7]:
# Sigmoid层
class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        out = 1 / (1 + np.exp(x))
        self.out = out
        return out
    
    def backward(self, dout):
        dx = dout * self.out * (1.0 - self.out)
        
        return dx
    

In [8]:
# Affine层 - np.dot
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
    
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)     # 对输入值求导，是因为有可能输入值来自于上一层的输出
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)

        return dx

In [9]:
# softmax层
import sys, os
sys.path.append(os.pardir)
from common.functions import softmax, cross_entropy_error

class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)

        return self.loss
    
    def backward(self, dout=1):
        batch_size = self.y.shape[0]        # 注意这里只能处理批，不能处理单个数据
        dx = (self.y - self.t) / batch_size
        
        return dx

In [10]:
# OrderedDict - 有序字典
from collections import OrderedDict

od = OrderedDict()
od["Affine1"] = 1
od["Relu1"] = 2

for key, val in od.items():
    print(key, val)

for val in od.values():
    print(val)

for key in od.keys():
    print(key)

Affine1 1
Relu1 2
1
2
Affine1
Relu1


In [26]:
# 数值微分 vs. 反向传播
from TwoLayerNet import TwoLayerNet
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(one_hot_lable=True)

network = TwoLayerNet(784, 50, 10)
x_batch = x_train[:3]
t_batch = t_train[:3]


In [27]:
grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    print(f"{key}: {diff}")

W1: 9.636116188752832e-08
b1: 1.896506961625321e-06
W2: 6.379564505455199e-09
b2: 1.4036910548631543e-07


In [31]:
# 数据读入
(x_train, t_train), (x_test, t_test) = load_mnist(one_hot_lable=True)
# 网络参数
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size / batch_size, 1)
epoch = 0
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

# 训练
for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # 反向传播
    grad = network.gradient(x_batch, t_batch)
    # 更新梯度
    for key in grad.keys():
        network.params[key] -= learning_rate * grad[key]
    
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(f"epoch {epoch:2} | train_acc: {train_acc:.3f} | test_acc: {test_acc:.3f}")
        epoch += 1

epoch  0 | train_acc: 0.095 | test_acc: 0.094
epoch  1 | train_acc: 0.904 | test_acc: 0.907
epoch  2 | train_acc: 0.924 | test_acc: 0.926
epoch  3 | train_acc: 0.934 | test_acc: 0.935
epoch  4 | train_acc: 0.944 | test_acc: 0.942
epoch  5 | train_acc: 0.950 | test_acc: 0.948
epoch  6 | train_acc: 0.956 | test_acc: 0.952
epoch  7 | train_acc: 0.961 | test_acc: 0.957
epoch  8 | train_acc: 0.964 | test_acc: 0.961
epoch  9 | train_acc: 0.967 | test_acc: 0.962
epoch 10 | train_acc: 0.969 | test_acc: 0.963
epoch 11 | train_acc: 0.971 | test_acc: 0.966
epoch 12 | train_acc: 0.972 | test_acc: 0.966
epoch 13 | train_acc: 0.975 | test_acc: 0.969
epoch 14 | train_acc: 0.977 | test_acc: 0.968
epoch 15 | train_acc: 0.976 | test_acc: 0.967
epoch 16 | train_acc: 0.978 | test_acc: 0.969
