# MNIST: trained with small Convolution Network

Some utility function.

In [80]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [81]:
# 初始化权重值和偏置，来break symmetry；保证包含正的值，来避免dead neuron（ReLU）
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.1) # 截掉过大或者过小值的正态分布随机数
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(.1, shape = shape)
    return tf.Variable(initial)

In [82]:
#产生我们需要的结构的神经网络层
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')

def max_pool_2x2(x):
    #ksize: 池化窗口的大小
    #x：被池化的对象
    #strides：池化操作的步长
    #padding: valid(如果ksize不能整除对应维度的size，舍弃余数部分)；same(如果ksize不能整除对应的维度的size,用0填补直到能够整除)
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides = [1,2,2,1], padding='SAME') 

### Step1.
Construct the NN.

In [83]:
# define the input data
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])

# layer 1: convolution layer
W_conv1 = weight_variable([5,5,1,32]) #[filter_width, filter_height, in_channels, out_channels]
b_conv1 = bias_variable([32]) # out_channels

x_images = tf.reshape(x, [-1, 28, 28, 1]) #[batch, in_width, in_height, channels]
h_conv1 = tf.nn.relu(conv2d(x_images, W_conv1) + b_conv1) # features = max{features, 0}
h_pool1 = max_pool_2x2(h_conv1)

# layer 2: convolution layer
W_conv2 = weight_variable([5,5,32,64]) #out_channel = 64
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2)+b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

# layer 3: fully-connected layer
# 每个图像转化成size为1024的矩阵
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1)+b_fc1)

# layer 4: dropout layer
# 为了避免过拟合，并将dropout的概率作为输入，以便区分测试和训练
keep_prob = tf.placeholder(tf.float32) #每个node被keep的概率（如果keep下来，则输出原值除该概率值；如果被丢弃，则输出0）
h_fc1_dropout = tf.nn.dropout(h_fc1, keep_prob)

# layer 5: readout layer (fully convolution layer)
# 类似softmax一样计算出对应的one-hot向量
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

h_fc2 = tf.matmul(h_fc1_dropout, W_fc2) + b_fc2

### Step2.
Define required operations of the network.
- cost
- train
- accuracy

In [84]:
# for cost
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_, logits = h_fc2))
# for train
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# for accuracy
correct_prediction = tf.equal(tf.argmax(h_fc2, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype = tf.float32))

### Step3.
Train the model

Interactive Session vs. Session
- 在交互式的会话中，可以一边构造计算图，一边进行运算
- 在静态的会话中，先构造整个网络（包括y / cost / train / accuracy），然后执行所有的run操作（在with tf.Session() as session中进行）
- with ... as ... 会在退出之后，自动清除session

In [85]:

with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    
    for iter in range(5000):
        # get next batch of train data
        batch = mnist.train.next_batch(50)
        if iter%100 ==0:
            accu = accuracy.eval(feed_dict={x: batch[0], y_:batch[1], keep_prob:1.0 })
            print(("Step {}: {}").format(iter, accu))
        train_step.run(feed_dict={x:batch[0], y_:batch[1], keep_prob:0.5})
        
    print("Test accuracy: %.2f" % accuracy.eval(feed_dict={x:mnist.test.images, y_:mnist.test.labels, keep_prob:1.0}))

Step 0: 0.03999999910593033
Step 100: 0.8799999952316284
Step 200: 0.8999999761581421
Step 300: 0.9800000190734863
Step 400: 0.9200000166893005
Step 500: 0.9200000166893005
Step 600: 0.9200000166893005
Step 700: 0.9800000190734863
Step 800: 0.9800000190734863
Step 900: 0.9599999785423279
Step 1000: 0.9200000166893005
Step 1100: 0.9200000166893005
Step 1200: 0.9599999785423279
Step 1300: 0.9399999976158142
Step 1400: 0.9800000190734863
Step 1500: 1.0
Step 1600: 0.9800000190734863
Step 1700: 0.9800000190734863
Step 1800: 0.9599999785423279
Step 1900: 1.0
Step 2000: 0.9599999785423279
Step 2100: 0.9800000190734863
Step 2200: 0.9599999785423279
Step 2300: 0.9800000190734863
Step 2400: 0.9800000190734863
Step 2500: 1.0
Step 2600: 0.9800000190734863
Step 2700: 0.9599999785423279
Step 2800: 1.0
Step 2900: 0.9399999976158142
Step 3000: 0.9800000190734863
Step 3100: 0.9599999785423279
Step 3200: 0.9599999785423279
Step 3300: 1.0
Step 3400: 1.0
Step 3500: 0.9800000190734863
Step 3600: 1.0
Step 3

<hr/>

### Summary
The basic steps:
1. Data: acquire, preprocess
2. Model: construct the model to be train
3. Computational graph: model, cost, train node, accuracy
4. Train & Testing: using a static session