# 卷积神经网络
在本章节的练习中，我们将对卷积，池化，空间批量归一化等内容进行编码，并比较不同实现方式的执行效率。由于python语言运行效率太过缓慢，我们会在第八章的tensorflow介绍章节再次训练卷积网络，本章我们只需要针对卷积网络的各个模块进行编码练习即可。本章我们将逐步完成：
* 卷积前向传播编码练习
* 卷积反向传播编码练习
* 最大池化前向传播编码练习
* 最大池化反向传播编码练习
* 组合完整卷积层编码练习
* 空间批量归一化编码练习


In [None]:
#-*- coding: utf-8 -*-
import time
import numpy as np
import matplotlib.pyplot as plt
from classifiers.chapter6 import *
from utils import *

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) 
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ 相对误差 """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [None]:
# 导入数据

data = get_CIFAR10_data()
for k, v in data.iteritems():
  print '%s: ' % k, v.shape

## 卷积前向传播(简单版本)

打开 `classifiters\chapter6\cnn_layers.py` 文件完成`conv_forward_naive`函数，实现卷积层的前向传播，目前并不需要考虑效率的问题，只要编码成功即可。完成后运行下列代码进行测试。

In [None]:
x_shape = (2, 3, 4, 4)
w_shape = (3, 3, 4, 4)
x = np.linspace(-0.1, 0.5, num=np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num=np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num=3)

conv_param = {'stride': 2, 'pad': 1}
out, _ = conv_forward_naive(x, w, b, conv_param)
correct_out = np.array([[[[[-0.08759809, -0.10987781],
                           [-0.18387192, -0.2109216 ]],
                          [[ 0.21027089,  0.21661097],
                           [ 0.22847626,  0.23004637]],
                          [[ 0.50813986,  0.54309974],
                           [ 0.64082444,  0.67101435]]],
                         [[[-0.98053589, -1.03143541],
                           [-1.19128892, -1.24695841]],
                          [[ 0.69108355,  0.66880383],
                           [ 0.59480972,  0.56776003]],
                          [[ 2.36270298,  2.36904306],
                           [ 2.38090835,  2.38247847]]]]])

# 实现误差应该在1e-8左右
print '测试 conv_forward_naive函数'
print '误差: ', rel_error(out, correct_out)

In [None]:
a = np.array([[[[-0.08759809, -0.10987781],
                           [-0.18387192, -0.2109216 ]],
                          [[ 0.21027089,  0.21661097],
                           [ 0.22847626,  0.23004637]],
                          [[ 0.50813986,  0.54309974],
                           [ 0.64082444,  0.67101435]]],
                         [[[-0.98053589, -1.03143541],
                           [-1.19128892, -1.24695841]],
                          [[ 0.69108355,  0.66880383],
                           [ 0.59480972,  0.56776003]],
                          [[ 2.36270298,  2.36904306],
                           [ 2.38090835,  2.38247847]]]])
print a.shape

## 卷积层反向传播（简单版本）

打开 `classifiters\chapter6\cnn_layers.py` 文件完成 `conv_backward_naive`函数，实现卷积层的反向传播，目前并不需要考虑效率的问题，只要编码成功即可。完成后运行下列代码进行测试。

In [None]:
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2,)
dout = np.random.randn(4, 2, 5, 5)
conv_param = {'stride': 1, 'pad': 1}

dx_num = eval_numerical_gradient_array(lambda x: conv_forward_naive(
        x, w, b, conv_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: conv_forward_naive(
        x, w, b, conv_param)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: conv_forward_naive(
        x, w, b, conv_param)[0], b, dout)

out, cache = conv_forward_naive(x, w, b, conv_param)
dx, dw, db = conv_backward_naive(dout, cache)

# 相对错误大约为1e-9'
print '测试 conv_backward_naive 函数'
print 'dx 误差: ', rel_error(dx, dx_num)
print 'dw 误差: ', rel_error(dw, dw_num)
print 'db 误差: ', rel_error(db, db_num)

## 最大池化前向传播(简单版)

打开 `classifiters\chapter6\cnn_layers.py` 文件,完成 `max_pool_forward_naive`函数，实现最大池化前向传播，目前并不需要考虑效率的问题，只要编码成功即可。完成后运行下列代码进行测试。

In [None]:
x_shape = (2, 3, 4, 4)
x = np.linspace(-0.3, 0.4, num=np.prod(x_shape)).reshape(x_shape)
pool_param = {'pool_width': 2, 'pool_height': 2, 'stride': 2}

out, _ = max_pool_forward_naive(x, pool_param)

correct_out = np.array([[[[-0.26315789, -0.24842105],
                          [-0.20421053, -0.18947368]],
                         [[-0.14526316, -0.13052632],
                          [-0.08631579, -0.07157895]],
                         [[-0.02736842, -0.01263158],
                          [ 0.03157895,  0.04631579]]],
                        [[[ 0.09052632,  0.10526316],
                          [ 0.14947368,  0.16421053]],
                         [[ 0.20842105,  0.22315789],
                          [ 0.26736842,  0.28210526]],
                         [[ 0.32631579,  0.34105263],
                          [ 0.38526316,  0.4       ]]]])

# 相对误差大约为 1e-8.
print '测试 max_pool_forward_naive 函数:'
print '误差: ', rel_error(out, correct_out)

## 最大池化反向传播(简单版本)
打开 `classifiters\chapter6\cnn_layers.py` 文件,完成 `max_pool_backward_naive`函数，实现最大池化反向传播，目前并不需要考虑效率的问题，只要编码成功即可。完成后运行下列代码进行测试。

In [None]:
x = np.random.randn(3, 2, 8, 8)
dout = np.random.randn(3, 2, 4, 4)
pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

dx_num = eval_numerical_gradient_array(
    lambda x: max_pool_forward_naive(x, pool_param)[0], x, dout)

out, cache = max_pool_forward_naive(x, pool_param)
dx = max_pool_backward_naive(dout, cache)

# 相对误差大约为 1e-12
print '测试 max_pool_backward_naive 函数:'
print 'dx 误差: ', rel_error(dx, dx_num)

## 快速卷积
执行下列代码，比较快速卷积与你实现的卷积操作

In [None]:
from classifiers.chapter6.cnn_layers import conv_forward_fast
from time import time

x = np.random.randn(100, 3, 31, 31)
w = np.random.randn(25, 3, 3, 3)
b = np.random.randn(25,)
dout = np.random.randn(100, 25, 16, 16)
conv_param = {'stride': 2, 'pad': 1}

t0 = time()
out_naive, cache_naive = conv_forward_naive(x, w, b, conv_param)
t1 = time()
out_fast, cache_fast = conv_forward_fast(x, w, b, conv_param)
t2 = time()

print '测试 conv_forward_fast:'
print '慢速版本: %fs' % (t1 - t0)
print '快速版本: %fs' % (t2 - t1)
print '加速: %fx' % ((t1 - t0) / (t2 - t1))
print '误差: ', rel_error(out_naive, out_fast)



## 快速池化操作
执行下列代码，比较快速池化与你实现的池化操作

In [None]:

x = np.random.randn(100, 3, 32, 32)
dout = np.random.randn(100, 3, 16, 16)
pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

t0 = time()
out_naive, cache_naive = max_pool_forward_naive(x, pool_param)
t1 = time()
out_fast, cache_fast = max_pool_forward_fast(x, pool_param)
t2 = time()

print '测试 pool_forward_fast:'
print '慢速版本: %fs' % (t1 - t0)
print '快速版本: %fs' % (t2 - t1)
print '加速: %fx' % ((t1 - t0) / (t2 - t1))
print '误差: ', rel_error(out_naive, out_fast)

t0 = time()
dx_naive = max_pool_backward_naive(dout, cache_naive)
t1 = time()
dx_fast = max_pool_backward_fast(dout, cache_fast)
t2 = time()

print '\n测试 pool_backward_fast:'
print '慢速版本: %fs' % (t1 - t0)
print '快速版本: %fs' % (t2 - t1)
print '加速: %fx' % ((t1 - t0) / (t2 - t1))
print 'dx 误差: ', rel_error(dx_naive, dx_fast)

# 完整的卷积层
打开 `classifiters\chapter6\cnn_layers.py` 文件,阅读`conv_relu_pool_forward`,`conv_relu_pool_backward`完成后运行下列代码进行测试。

In [None]:
x = np.random.randn(2, 3, 16, 16)
w = np.random.randn(3, 3, 3, 3)
b = np.random.randn(3,)
dout = np.random.randn(2, 3, 8, 8)
conv_param = {'stride': 1, 'pad': 1}
pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

out, cache = conv_relu_pool_forward(x, w, b, conv_param, pool_param)
dx, dw, db = conv_relu_pool_backward(dout, cache)

dx_num = eval_numerical_gradient_array(lambda x: conv_relu_pool_forward(
        x, w, b, conv_param, pool_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: conv_relu_pool_forward(
        x, w, b, conv_param, pool_param)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: conv_relu_pool_forward(
        x, w, b, conv_param, pool_param)[0], b, dout)

print '测试 conv_relu_pool'
print 'dx 误差: ', rel_error(dx_num, dx)
print 'dw 误差: ', rel_error(dw_num, dw)
print 'db 误差: ', rel_error(db_num, db)

In [None]:

x = np.random.randn(2, 3, 8, 8)
w = np.random.randn(3, 3, 3, 3)
b = np.random.randn(3,)
dout = np.random.randn(2, 3, 8, 8)
conv_param = {'stride': 1, 'pad': 1}

out, cache = conv_relu_forward(x, w, b, conv_param)
dx, dw, db = conv_relu_backward(dout, cache)

dx_num = eval_numerical_gradient_array(lambda x: conv_relu_forward(x, w, b, conv_param)[0], x, dout)
dw_num = eval_numerical_gradient_array(lambda w: conv_relu_forward(x, w, b, conv_param)[0], w, dout)
db_num = eval_numerical_gradient_array(lambda b: conv_relu_forward(x, w, b, conv_param)[0], b, dout)

print '测试 conv_relu:'
print 'dx 误差: ', rel_error(dx_num, dx)
print 'dw 误差: ', rel_error(dw_num, dw)
print 'db 误差: ', rel_error(db_num, db)

## 浅层卷积网络

打开 `classifiters\chapter6\cnn.py` 文件, 实现`ThreeLayerConvNet`类，完成后运行下列代码进行测试。

### 损失函数检验

在不添加正则化的情况下，c分类任务，初始时的损失值应该接近于`log(c)`，运行下列代码进行损失值检验：

In [None]:
model = ThreeLayerConvNet()
N = 50
X = np.random.randn(N, 3, 32, 32)
y = np.random.randint(10, size=N)

loss, grads = model.loss(X, y)
print '初始损失值所对应分类 (无正则化): ', np.exp(loss)

model.reg = 0.5
loss, grads = model.loss(X, y)
print '初始损失值 (正则化): ', loss

### 梯度检验

运行下列代码进行梯度检验：

In [None]:
num_inputs = 2
input_dim = (3, 16, 16)
reg = 0.0
num_classes = 10
X = np.random.randn(num_inputs, *input_dim)
y = np.random.randint(num_classes, size=num_inputs)

model = ThreeLayerConvNet(num_filters=3, filter_size=3,
                          input_dim=input_dim, hidden_dim=7)
loss, grads = model.loss(X, y)
for param_name in sorted(grads):
    f = lambda _: model.loss(X, y)[0]
    param_grad_num = eval_numerical_gradient(
        f, model.params[param_name], verbose=False, h=1e-6)
    e = rel_error(param_grad_num, grads[param_name])
    print '%s 最大相对误差: %e' % (param_name, rel_error(
            param_grad_num, grads[param_name]))

### 过拟合小量数据

运行下列代码，确保在小数据集上出现明显的过拟合现象

In [None]:
num_train = 100
small_data = {
  'X_train': data['X_train'][:num_train],
  'y_train': data['y_train'][:num_train],
  'X_val': data['X_val'],
  'y_val': data['y_val'],
}

model = ThreeLayerConvNet(weight_scale=1e-2)

trainer = Trainer(model, small_data,
                num_epochs=20, batch_size=50,
                update_rule='adam',
                updater_config={
                  'learning_rate': 1e-3,
                },
                verbose=True, print_every=5)
trainer.train()

In [None]:
plt.subplot(2, 1, 1)
plt.title('Training loss',fontsize=18)
plt.plot(trainer.loss_history, 'o')
plt.xlabel('iteration',fontsize=18)
plt.ylabel('loss',fontsize=18)

plt.subplot(2, 1, 2)
plt.subplots_adjust(left=0.08, right=0.95, wspace=0.25, hspace=0.3)
plt.title('train accuracy VS val accuracy',fontsize=18)
plt.plot(trainer.train_acc_history, '-o')
plt.plot(trainer.val_acc_history, '-*')
plt.legend(['train', 'val'], loc='upper left')
plt.xlabel('epoch',fontsize=18)
plt.ylabel('accuracy',fontsize=18)
plt.show()

## 空间批量归一化

BN算法是一种非常高效的技术，大大加快的网络训练。但BN算法通常使用在全连接网络，因此在卷积网络中会有一点修改，我们将其称为"spatial batch normalization."

正常的BN接收(N,D)数据，现在我们需要将BN算法调整为接受(N,C,H,W)数据。
由于之前我们已经编写好了BN算法，你只需要将图片数据重塑成(N,D)然后调用我们实现好了的BN算法，之后再将其输出结果重塑回(N,C,H,W)


### SBN前向传播

打开 `classifiters\chapter6\cnn_layers.py` 文件, 实现`spatial_batchnorm_forward`，完成后运行下列代码进行测试。

In [None]:
#训练阶段：SBN前向传播
N, C, H, W = 2, 3, 4, 5
x = 4 * np.random.randn(N, C, H, W) + 10

print '使用BN之前:'
print '  数据形状: ', x.shape
print '  均值: ', x.mean(axis=(0, 2, 3))
print '  标准差: ', x.std(axis=(0, 2, 3))


gamma, beta = np.ones(C), np.zeros(C)
bn_param = {'mode': 'train'}
out, _ = spatial_batchnorm_forward(x, gamma, beta, bn_param)
print '使用BN后:'
print '  输出数据形状: ', out.shape
print '  均值: ', out.mean(axis=(0, 2, 3))
print '  标准差: ', out.std(axis=(0, 2, 3))

gamma, beta = np.asarray([3, 4, 5]), np.asarray([6, 7, 8])
out, _ = spatial_batchnorm_forward(x, gamma, beta, bn_param)
print '在BN后使用(gamma, beta)进行缩放:'
print '  输出数据形状: ', out.shape
print '  均值: ', out.mean(axis=(0, 2, 3))
print '  标准差: ', out.std(axis=(0, 2, 3))

In [None]:
#测试阶段：SBN前向传播
N, C, H, W = 10, 4, 11, 12

bn_param = {'mode': 'train'}
gamma = np.ones(C)
beta = np.zeros(C)
for t in xrange(50):
  x = 2.3 * np.random.randn(N, C, H, W) + 13
  spatial_batchnorm_forward(x, gamma, beta, bn_param)
bn_param['mode'] = 'test'
x = 2.3 * np.random.randn(N, C, H, W) + 13
a_norm, _ = spatial_batchnorm_forward(x, gamma, beta, bn_param)

print '  均值: ', a_norm.mean(axis=(0, 2, 3))
print '  标准差: ', a_norm.std(axis=(0, 2, 3))

### SBN反向传播
打开 `classifiters\chapter6\cnn.py` 文件, 实现`spatial_batchnorm_backward`，完成后运行下列代码进行测试。

In [None]:
N, C, H, W = 2, 3, 4, 5
x = 5 * np.random.randn(N, C, H, W) + 12
gamma = np.random.randn(C)
beta = np.random.randn(C)
dout = np.random.randn(N, C, H, W)

bn_param = {'mode': 'train'}
fx = lambda x: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0]
fg = lambda a: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0]
fb = lambda b: spatial_batchnorm_forward(x, gamma, beta, bn_param)[0]

dx_num = eval_numerical_gradient_array(fx, x, dout)
da_num = eval_numerical_gradient_array(fg, gamma, dout)
db_num = eval_numerical_gradient_array(fb, beta, dout)

_, cache = spatial_batchnorm_forward(x, gamma, beta, bn_param)
dx, dgamma, dbeta = spatial_batchnorm_backward(dout, cache)
print 'dx 误差: ', rel_error(dx_num, dx)
print 'dgamma 误差: ', rel_error(da_num, dgamma)
print 'dbeta 误差: ', rel_error(db_num, dbeta)

## 训练卷积网络



In [None]:



model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=200, reg=0.001)

trainer = Trainer(model, data,
                num_epochs=10, batch_size=50,
                update_rule='adam',
                updater_config={
                  'learning_rate': 1e-3,
                },
                verbose=True, print_every=2)
trainer.train()