# CNN 卷积神经网络


## CNN的结构：
卷积神经网络CNN的结构一般包含这几个层：
- 输入层：用于数据的输入
- 卷积层：使用卷积核进行特征提取和特征映射
- 激励层：由于卷积也是一种线性运算，因此需要增加非线性映射
- 池化层：进行下采样，对特征图稀疏处理，减少数据运算量。
- 全连接层：通常在CNN的尾部进行重新拟合，减少特征信息的损失
- 输出层：用于输出结果

当然中间还可以使用一些其他的功能层:

- 归一化层（Batch Normalization）：在CNN中对特征的归一化
- 切分层：对某些（图片）数据的进行分区域的单独学习
- 融合层：对独立进行特征学习的分支进行融合

## CNN主要层次示例

### 应用CNN进行汽车图像识别用例
![CNN_1.png](CNN_1.jpeg)


### 通过卷积计算进行局部特征提取
![CNN_2.png](CNN_2.jpeg)
![CNN_3.png](CNN_3.jpeg)
![CNN_4.png](CNN_4.jpeg)
![CNN_5.png](CNN_5.jpeg)


### 池化示例（最大化池化）
![CNN_6.png](CNN_6.jpeg)


### CNN算法汇总
![CNN_7.png](CNN_7.jpeg)


## 使用Tensorflow 实现CNN

使用一个简单的CNN网络结构如下，括号里边表示tensor经过本层后的输出shape：

- 输入层（28 x 28 x 1）
- 卷积层1（28 x 28 x 32）
- pooling层1（14 x 14 x 32）
- 卷积层2（14 x 14 x 64）
- pooling层2（7 x 7 x 64）
- 全连接层（1 x 1024）
- softmax层（10）


In [1]:
"""Functions for downloading and reading MNIST data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# pylint: disable=unused-import
import gzip
import os
import tempfile

import numpy
from six.moves import urllib
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
# pylint: enable=unused-import

In [2]:
#coding:utf-8
import MNIST_data.input_data as input_data
import time

"""
权重初始化
初始化为一个接近0的很小的正数
"""
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape = shape)
    return tf.Variable(initial)

"""
卷积和池化，使用卷积步长为1（stride size）,0边距（padding size）
池化用简单传统的2x2大小的模板做max pooling
"""
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding = 'SAME')
    # tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, data_format=None, name=None)
    # x(input)  : [batch, in_height, in_width, in_channels]
    # W(filter) : [filter_height, filter_width, in_channels, out_channels]
    # strides   : The stride of the sliding window for each dimension of input.
    #             For the most common case of the same horizontal and vertices strides, strides = [1, stride, stride, 1]

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize = [1, 2, 2, 1],
                          strides = [1, 2, 2, 1], padding = 'SAME')
    # tf.nn.max_pool(value, ksize, strides, padding, data_format='NHWC', name=None)
    # x(value)              : [batch, height, width, channels]
    # ksize(pool大小)        : A list of ints that has length >= 4. The size of the window for each dimension of the input tensor.
    # strides(pool滑动大小)   : A list of ints that has length >= 4. The stride of the sliding window for each dimension of the input tensor.

start = time.clock() #计算开始时间
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) #MNIST数据输入

"""
第一层 卷积层

x_image(batch, 28, 28, 1) -> h_pool1(batch, 14, 14, 32)
"""
x = tf.placeholder(tf.float32,[None, 784])
x_image = tf.reshape(x, [-1, 28, 28, 1]) #最后一维代表通道数目，如果是rgb则为3 
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
# x_image -> [batch, in_height, in_width, in_channels]
#            [batch, 28, 28, 1]
# W_conv1 -> [filter_height, filter_width, in_channels, out_channels]
#            [5, 5, 1, 32]
# output  -> [batch, out_height, out_width, out_channels]
#            [batch, 28, 28, 32]
h_pool1 = max_pool_2x2(h_conv1)
# h_conv1 -> [batch, in_height, in_weight, in_channels]
#            [batch, 28, 28, 32]
# output  -> [batch, out_height, out_weight, out_channels]
#            [batch, 14, 14, 32]

"""
第二层 卷积层

h_pool1(batch, 14, 14, 32) -> h_pool2(batch, 7, 7, 64)
"""
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# h_pool1 -> [batch, 14, 14, 32]
# W_conv2 -> [5, 5, 32, 64]
# output  -> [batch, 14, 14, 64]
h_pool2 = max_pool_2x2(h_conv2)
# h_conv2 -> [batch, 14, 14, 64]
# output  -> [batch, 7, 7, 64]

"""
第三层 全连接层

h_pool2(batch, 7, 7, 64) -> h_fc1(1, 1024)
"""
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

"""
Dropout

h_fc1 -> h_fc1_drop, 训练中启用，测试中关闭
"""
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

"""
第四层 Softmax输出层
"""
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

"""
训练和评估模型

ADAM优化器来做梯度最速下降,feed_dict中加入参数keep_prob控制dropout比例
"""
y_ = tf.placeholder("float", [None, 10])
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv)) #计算交叉熵
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) #使用adam优化器来以0.0001的学习率来进行微调
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1)) #判断预测标签和实际标签是否匹配
accuracy = tf.reduce_mean(tf.cast(correct_prediction,"float"))

sess = tf.Session() #启动创建的模型
sess.run(tf.initialize_all_variables()) #旧版本
#sess.run(tf.global_variables_initializer()) #初始化变量

for i in range(5000): #开始训练模型，循环训练5000次
    batch = mnist.train.next_batch(50) #batch大小设置为50
    if i % 100 == 0:
        train_accuracy = accuracy.eval(session = sess,
                                       feed_dict = {x:batch[0], y_:batch[1], keep_prob:1.0})
        print("step %d, train_accuracy %g" %(i, train_accuracy))
    train_step.run(session = sess, feed_dict = {x:batch[0], y_:batch[1],
                   keep_prob:0.5}) #神经元输出保持不变的概率 keep_prob 为0.5

print("test accuracy %g" %accuracy.eval(session = sess,
      feed_dict = {x:mnist.test.images, y_:mnist.test.labels,
                   keep_prob:1.0})) #神经元输出保持不变的概率 keep_prob 为 1，即不变，一直保持输出

end = time.clock() #计算程序结束时间
print("running time is %g s") % (end-start)

ModuleNotFoundError: No module named 'MNIST_data'

## 通过搭建卷积神经网络来实现sklearn库中的手写数字识别，搭建的卷积神经网络结构如下图所示：
![CNN_8.png](CNN_8.png)

In [35]:
import tensorflow as tf

from sklearn.datasets import load_digits
import numpy as np

In [36]:
# load traing data 
digits = load_digits()
X_data = digits.data.astype(np.float32)
Y_data = digits.target.astype(np.float32).reshape(-1,1)
print(X_data.shape)
print(Y_data.shape)

(1797, 64)
(1797, 1)


In [37]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_data = scaler.fit_transform(X_data)

In [38]:
X_data

array([[0.    , 0.    , 0.3125, ..., 0.    , 0.    , 0.    ],
       [0.    , 0.    , 0.    , ..., 0.625 , 0.    , 0.    ],
       [0.    , 0.    , 0.    , ..., 1.    , 0.5625, 0.    ],
       ...,
       [0.    , 0.    , 0.0625, ..., 0.375 , 0.    , 0.    ],
       [0.    , 0.    , 0.125 , ..., 0.75  , 0.    , 0.    ],
       [0.    , 0.    , 0.625 , ..., 0.75  , 0.0625, 0.    ]],
      dtype=float32)

In [39]:
from sklearn.preprocessing import OneHotEncoder
Y = OneHotEncoder().fit_transform(Y_data).todense() #one-hot编码

In [40]:
Y

matrix([[1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 1., 0.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 1., 0.]])

In [41]:
# 转换为图片的格式 （batch，height，width，channels）
X = X_data.reshape(-1,8,8,1)

In [42]:
batch_size = 8 # 使用MBGD算法，设定batch_size为8

In [43]:
def generatebatch(X,Y,n_examples, batch_size):
    for batch_i in range(n_examples // batch_size):
        start = batch_i*batch_size
        end = start + batch_size
        batch_xs = X[start:end]
        batch_ys = Y[start:end]
        yield batch_xs, batch_ys # 生成每一个batch

In [44]:
tf.reset_default_graph()
# 输入层
tf_X = tf.placeholder(tf.float32,[None,8,8,1])
tf_Y = tf.placeholder(tf.float32,[None,10])

In [45]:
# 卷积层+激活层
conv_filter_w1 = tf.Variable(tf.random_normal([3, 3, 1, 10]))
conv_filter_b1 =  tf.Variable(tf.random_normal([10]))
relu_feature_maps1 = tf.nn.relu(\
                tf.nn.conv2d(tf_X, conv_filter_w1,strides=[1, 1, 1, 1], padding='SAME') + conv_filter_b1)

In [46]:
# 池化层
max_pool1 = tf.nn.max_pool(relu_feature_maps1,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME')

print(max_pool1)

Tensor("MaxPool:0", shape=(?, 4, 4, 10), dtype=float32)


In [47]:
# 卷积层
conv_filter_w2 = tf.Variable(tf.random_normal([3, 3, 10, 5]))
conv_filter_b2 =  tf.Variable(tf.random_normal([5]))
conv_out2 = tf.nn.conv2d(relu_feature_maps1, conv_filter_w2,strides=[1, 2, 2, 1], padding='SAME') + conv_filter_b2
print(conv_out2)

Tensor("add_1:0", shape=(?, 4, 4, 5), dtype=float32)


In [48]:
# BN归一化层+激活层 
batch_mean, batch_var = tf.nn.moments(conv_out2, [0, 1, 2], keep_dims=True)
shift = tf.Variable(tf.zeros([5]))
scale = tf.Variable(tf.ones([5]))
epsilon = 1e-3
BN_out = tf.nn.batch_normalization(conv_out2, batch_mean, batch_var, shift, scale, epsilon)
print(BN_out)
relu_BN_maps2 = tf.nn.relu(BN_out)

Tensor("batchnorm/add_1:0", shape=(?, 4, 4, 5), dtype=float32)


In [49]:
# 池化层
max_pool2 = tf.nn.max_pool(relu_BN_maps2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME')
print(max_pool2)

Tensor("MaxPool_1:0", shape=(?, 2, 2, 5), dtype=float32)


In [50]:
# 将特征图进行展开
max_pool2_flat = tf.reshape(max_pool2, [-1, 2*2*5])

In [51]:
# 全连接层
fc_w1 = tf.Variable(tf.random_normal([2*2*5,50]))
fc_b1 =  tf.Variable(tf.random_normal([50]))
fc_out1 = tf.nn.relu(tf.matmul(max_pool2_flat, fc_w1) + fc_b1)

In [58]:
# 输出层
out_w1 = tf.Variable(tf.random_normal([50,10]))
out_b1 = tf.Variable(tf.random_normal([10]))
pred = tf.nn.softmax(tf.matmul(fc_out1,out_w1)+out_b1)

In [59]:
loss = -tf.reduce_mean(tf_Y*tf.log(tf.clip_by_value(pred,1e-11,1.0)))

In [60]:
train_step = tf.train.AdamOptimizer(1e-3).minimize(loss)

In [61]:
y_pred = tf.arg_max(pred,1)
bool_pred = tf.equal(tf.arg_max(tf_Y,1),y_pred)

In [62]:
accuracy = tf.reduce_mean(tf.cast(bool_pred,tf.float32)) # 准确率

In [64]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(1000): # 迭代1000个周期
        for batch_xs,batch_ys in generatebatch(X,Y,Y.shape[0],batch_size): # 每个周期进行MBGD算法
            sess.run(train_step,feed_dict={tf_X:batch_xs,tf_Y:batch_ys})
        if(epoch%100==0):
            res = sess.run(accuracy,feed_dict={tf_X:X,tf_Y:Y})
            print(epoch,res)
    res_ypred = y_pred.eval(feed_dict={tf_X:X,tf_Y:Y}).flatten() # 只能预测一批样本，不能预测一个样本
    print(res_ypred)

0 0.17306623
100 0.6967167
200 0.69894266
300 0.6978297
400 0.69894266
500 0.69894266
600 0.69894266
700 0.69894266
800 0.69894266
900 0.8942682
[0 1 2 ... 8 9 8]


In [66]:
from sklearn.metrics import  accuracy_score
print(accuracy_score(Y_data,res_ypred.reshape(-1,1)))

0.8953811908736784
