In [2]:
# 7.4 卷积层和池化层的实现

In [None]:
# CNN中各层间传递的数据是4维数据。所谓4维数据，比如
# 数据的形状是(10, 1, 28, 28)，则它对应10个高为28、长为28、通道为1的数
# 据。用Python来实现的话，如下所示。

In [4]:
import numpy as np
x = np.random.rand(10, 1, 28, 28)
x.shape

(10, 1, 28, 28)

In [None]:
# 如果要访问第1个数据，只要写x[0]就可以了（注意Python的索
# 引是从0开始的）。同样地，用x[1]可以访问第2个数据。

In [11]:
x[0].shape

(1, 28, 28)

In [12]:
x[0]

array([[[0.33422283, 0.25017584, 0.10998635, 0.98133287, 0.4564739 ,
         0.60860829, 0.72935937, 0.66182336, 0.00830906, 0.32200186,
         0.68526191, 0.41233357, 0.98638851, 0.56388968, 0.77847805,
         0.61996636, 0.77045825, 0.61165972, 0.99565704, 0.5936943 ,
         0.62735811, 0.29561036, 0.6126446 , 0.33312011, 0.71034213,
         0.3707909 , 0.78017753, 0.01974643],
        [0.65750555, 0.16993749, 0.08097377, 0.83609296, 0.52948281,
         0.66646797, 0.84111156, 0.1501675 , 0.05444247, 0.68799729,
         0.01151102, 0.11954533, 0.49806426, 0.80024149, 0.59143208,
         0.33735674, 0.62703941, 0.49377166, 0.1450928 , 0.08141418,
         0.31204156, 0.7176734 , 0.61790289, 0.38790059, 0.42274397,
         0.95762967, 0.1306233 , 0.07289688],
        [0.07566849, 0.96522596, 0.62051577, 0.19302344, 0.39374249,
         0.07661005, 0.77216253, 0.73041675, 0.38789843, 0.66691874,
         0.41794234, 0.38896464, 0.10118004, 0.26104856, 0.67891512,
         0.

In [13]:
# 如果要访问第1个数据的第1个通道的空间数据，可以写成下面这样。
x[0, 0] # 或者x[0][0]

array([[0.33422283, 0.25017584, 0.10998635, 0.98133287, 0.4564739 ,
        0.60860829, 0.72935937, 0.66182336, 0.00830906, 0.32200186,
        0.68526191, 0.41233357, 0.98638851, 0.56388968, 0.77847805,
        0.61996636, 0.77045825, 0.61165972, 0.99565704, 0.5936943 ,
        0.62735811, 0.29561036, 0.6126446 , 0.33312011, 0.71034213,
        0.3707909 , 0.78017753, 0.01974643],
       [0.65750555, 0.16993749, 0.08097377, 0.83609296, 0.52948281,
        0.66646797, 0.84111156, 0.1501675 , 0.05444247, 0.68799729,
        0.01151102, 0.11954533, 0.49806426, 0.80024149, 0.59143208,
        0.33735674, 0.62703941, 0.49377166, 0.1450928 , 0.08141418,
        0.31204156, 0.7176734 , 0.61790289, 0.38790059, 0.42274397,
        0.95762967, 0.1306233 , 0.07289688],
       [0.07566849, 0.96522596, 0.62051577, 0.19302344, 0.39374249,
        0.07661005, 0.77216253, 0.73041675, 0.38789843, 0.66691874,
        0.41794234, 0.38896464, 0.10118004, 0.26104856, 0.67891512,
        0.6547055 , 0.6601

In [14]:
import sys, os
sys.path.append(os.pardir)
from common.util import im2col

In [15]:
x1 = np.random.rand(1, 3, 7, 7)
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(col1.shape) # (9, 75)

(9, 75)


In [20]:
x1[0, 0]

array([[0.03839591, 0.24788389, 0.30235029, 0.08191092, 0.41408943,
        0.01270931, 0.12752221],
       [0.71850661, 0.75066788, 0.45521606, 0.9317958 , 0.17290206,
        0.66372715, 0.17402088],
       [0.2954876 , 0.48134555, 0.13000129, 0.05147708, 0.64526114,
        0.38252742, 0.80410039],
       [0.38144241, 0.80397651, 0.24307599, 0.62918336, 0.94118704,
        0.38737915, 0.09172245],
       [0.98844763, 0.02761019, 0.5893923 , 0.75958281, 0.85564032,
        0.00177706, 0.87044051],
       [0.55277764, 0.29453557, 0.70165366, 0.03315095, 0.61735683,
        0.91846378, 0.18907312],
       [0.23316863, 0.38232395, 0.24437651, 0.59393741, 0.52680464,
        0.55943772, 0.67311005]])

In [17]:
col1[0]

array([0.03839591, 0.24788389, 0.30235029, 0.08191092, 0.41408943,
       0.71850661, 0.75066788, 0.45521606, 0.9317958 , 0.17290206,
       0.2954876 , 0.48134555, 0.13000129, 0.05147708, 0.64526114,
       0.38144241, 0.80397651, 0.24307599, 0.62918336, 0.94118704,
       0.98844763, 0.02761019, 0.5893923 , 0.75958281, 0.85564032,
       0.89385831, 0.25544756, 0.27892884, 0.31122035, 0.21801504,
       0.11792877, 0.48624226, 0.47540864, 0.70697923, 0.53771164,
       0.63178935, 0.03395237, 0.54431881, 0.26075442, 0.97089694,
       0.22184166, 0.04115981, 0.8805996 , 0.55990329, 0.0829363 ,
       0.58685257, 0.13151869, 0.53379512, 0.24328321, 0.79169923,
       0.77360492, 0.68987932, 0.41297011, 0.65934844, 0.13740372,
       0.87412263, 0.21571147, 0.18482165, 0.45983048, 0.56971224,
       0.0475127 , 0.74434783, 0.79941088, 0.22791843, 0.83375254,
       0.52459545, 0.60625093, 0.65949355, 0.10853401, 0.93656728,
       0.22922065, 0.63441701, 0.90290005, 0.4122449 , 0.43182

In [21]:
x2 = np.random.rand(10, 3, 7, 7) # 10个数据
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape) # (90, 75)

(90, 75)


In [None]:
# 使用im2col来实现卷积层。

In [None]:
class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
        
    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
        out_w = int(1 + (W + 2*self.pad - FW) / self.stride)
        
        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T # 滤波器的展开
        out = np.dot(col, col_W) + self.b
        
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
        return out

In [22]:
# 池化层的实现

In [None]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
        
    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)
        
        # 展开(1)
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h*self.pool_w)
        
        # 最大值(2)
        out = np.max(col, axis=1)
        
        # 转换(3)
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
        return out

In [None]:
# 7.5 CNN的实现

首先来看一下SimpleConvNet的初始化（__init__），取下面这些参数。
参数
- input_dim―输入数据的维度：（通道，高，长） 
- conv_param―卷积层的超参数（字典）。字典的关键字如下：
    - filter_num―滤波器的数量
    - filter_size―滤波器的大小
    - stride―步幅
    - pad―填充
- hidden_size―隐藏层（全连接）的神经元数量
- output_size―输出层（全连接）的神经元数量
- weitght_int_std―初始化时权重的标准差

In [23]:
class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28),
                 conv_param={'filter_num':30, 'filter_size':5,'pad':0, 'stride':1},
                 hidden_size=100, output_size=10, weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2*filter_pad) / \
        filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size/2) *(conv_output_size/2))
        
        self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_siz
                                     
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'],self.params['b1'],conv_param['stride'],conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'],self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'],self.params['b3'])
        self.last_layer = softmaxwithloss()
                             
  