## 深入了解CNN

In [1]:
%matplotlib inline

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import time
from datetime import timedelta 

In [3]:
tf.__version__

'1.1.0'

### 导入数据

In [4]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('mnist_data/', one_hot=True)

Extracting mnist_data/train-images-idx3-ubyte.gz
Extracting mnist_data/train-labels-idx1-ubyte.gz
Extracting mnist_data/t10k-images-idx3-ubyte.gz
Extracting mnist_data/t10k-labels-idx1-ubyte.gz


将标签提出出来

In [5]:
mnist.test.cls = np.argmax(mnist.test.labels, axis=1)

In [6]:
mnist.test.cls[:10]

array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9], dtype=int64)

### 配置参数

In [7]:
img_size = 28 # 图片长/宽
img_size_flat = img_size * img_size # 扁平图片大小
img_shape = (img_size, img_size) # 图片尺寸
num_channels = 1 
num_classes = 10

### 搭建模型

![](images/cnn_model.png)

生成变量函数

In [8]:
def weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))


def bias(shape):
    return tf.Variable(tf.constant(0.05, shape=shape))

卷积函数 + 全连接函数

In [9]:
def conv_layer(images, filter_size, in_channels, out_channels, use_pooling=True):
    """卷积+max pool函数
    
    parameters
    ---
    filter_size : int
        conv2d的filter宽，高
    in_channels : int
        输入images的channels
    out_channels : int
        输出images的channels
    """
    shape = [filter_size, filter_size, in_channels, out_channels]
    _filter = weights(shape)
    _bias = bias([out_channels])
    
    layer = tf.nn.conv2d(images, _filter, strides=[1, 1, 1, 1], padding='SAME') + _bias
    layer = tf.nn.relu(layer)    
    
    if use_pooling:
         layer = tf.nn.max_pool(layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')   
    
    return layer, _filter

In [10]:
def flatten_layer(layer):
    """flatten features"""
    layer_shape = layer.get_shape()
    # 计算width * height * channels
    num_features = layer_shape[1:4].num_elements()
    layer_flat = tf.reshape(layer, shape=[-1, num_features])
    return layer_flat, num_features

In [11]:
def fc_layer(layer, num_inputs, num_outputs, use_relu=True):
    """全连接网络
    
    parameters
    ---
    num_inputs  : int
        输入layer的特征数
    num_outputs : int
        输出layer的特征数
    """
    W = weights([num_inputs, num_outputs])
    b = bias([num_outputs])
    
    fc_layer = tf.matmul(layer, W) + b
    if use_relu:
        fc_layer = tf.nn.relu(fc_layer)
    return fc_layer

#### 终于开始搭建模型了

In [12]:
x = tf.placeholder(dtype=tf.float32, shape=[None, img_size_flat])
x_images = tf.reshape(x, shape=[-1, img_size, img_size, num_channels])
y_true = tf.placeholder(dtype=tf.float32, shape=[None, 10])
y_true_cls = tf.argmax(y_true, axis=1)

In [13]:
# 第一层卷积
layer_conv1, weights_conv1 = conv_layer(x_images, 
                                        filter_size=5, in_channels=1, out_channels=16)

In [14]:
layer_conv1

<tf.Tensor 'MaxPool:0' shape=(?, 14, 14, 16) dtype=float32>

In [15]:
# 第二层卷积
layer_conv2, weights_conv2 = conv_layer(layer_conv1, 
                                       filter_size=5, in_channels=16, out_channels=36)

In [16]:
layer_conv2

<tf.Tensor 'MaxPool_1:0' shape=(?, 7, 7, 36) dtype=float32>

In [17]:
layer_flat, num_features = flatten_layer(layer_conv2)
print('layer_flat', layer_flat)
print(num_features)

layer_flat Tensor("Reshape_1:0", shape=(?, 1764), dtype=float32)
1764


In [18]:
# fc咯
fc_layer1 = fc_layer(layer_flat, num_features, 128)
fc_layer1

<tf.Tensor 'Relu_2:0' shape=(?, 128) dtype=float32>

In [19]:
fc_layer2 = fc_layer(fc_layer1, 128, 10)
fc_layer2

<tf.Tensor 'Relu_3:0' shape=(?, 10) dtype=float32>

虽然最后的FC输出已经是我们的num_classes了，当时这些数字输出可能大，可能小（方差大），**毫无解释性**。我们需要将它normalize一下，这里使用softmax，它会固定输出再0到1之内，这也符合了我们要使用的概率挑选。

In [20]:
y_pred = tf.nn.softmax(fc_layer2)
y_pred_cls = tf.argmax(y_pred, axis=1)

#### cost fucntion

In [21]:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=fc_layer2)
loss = tf.reduce_mean(cross_entropy)

#### optimizer

In [22]:
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss)

#### performance measures

In [23]:
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype=tf.float32))

#### Run  = =

In [24]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [25]:
total_iterations = 0

def optimize(num_iterations, batch_size=64):
    global total_iterations
    
    start_time = time.time()
    
    for step in range(total_iterations, total_iterations + num_iterations):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {x: batch_xs, y_true: batch_ys}
        sess.run(optimizer, feed_dict)
        
        if step % 100 == 0:
            acc = accuracy.eval(feed_dict)
            msg = 'Optimization Iteration: {0:>6}, Traing Accuracy: {1:>6.1%}'
            print(msg.format(step + 1, acc))
            
    total_iterations += num_iterations
    end_time = time.time()
    time_dif = end_time - start_time
    print('Time Usage: ' + str(timedelta(seconds=int(round(time_dif)))))

### 配置一些有用的函数

In [26]:
def plot_images(images, cls_true, cls_pred=None):
    assert len(images) == len(cls_true) == 9
    
    fig, axes = plt.subplots(3, 3)
    fig.subplots_adjust(wspace=0.3, hspace=0.3)
    
    for i, ax in enumerate(axes.flat):
        ax.imshow(images[i].reshape(img_shape), cmap=plt.cm.binary)
        if cls_pred is None:
            ax.set_title('True:{0}'.format(cls_true[i]))
        else:
            ax.set_title('True:{0},Pred:{1}'.format(cls_true[i]), cls_pred[i])
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()