# Convolution Neural Network in Tensorflow

### author: qhduan@memect.co

In [1]:
import tempfile
import tensorflow as tf

In [2]:
from tensorflow.examples.tutorials.mnist import input_data

In [3]:
# 需要下载
# tempfile.gettempdir() 获取系统的临时目录，在linux下一般是 '/tmp'
mnist = input_data.read_data_sets(tempfile.gettempdir(), one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/t10k-labels-idx1-ubyte.gz


In [4]:
# 训练集，有55000条数据，每条数据784维（即784个变量，每条数据是28*28的字符方阵）
print(mnist.train.images.shape)

(55000, 784)


In [5]:
# 训练集标签，每个字符方阵代表0~9这10个数字
# 采用one hot的表示方法，即0~9编码为10维数组，0对应的数组第0位为1，其他位为0
# 1对应数字第1位为1，其他位为0，以此类推
print(mnist.train.labels.shape)

(55000, 10)


In [6]:
# 验证集
print(mnist.validation.images.shape)
print(mnist.validation.labels.shape)
# 测试集
print(mnist.test.images.shape)
print(mnist.test.labels.shape)

(5000, 784)
(5000, 10)
(10000, 784)
(10000, 10)


In [7]:
# 其实输入数据保存的就是字符图像的抽象，例如下面训练集的第 1 条数据，保存的应该是数字 3
index = 1
a_char = mnist.train.images[index]
a_char = a_char.reshape([28, 28])
for i in range(28):
    t = ['*' if x > 0 else '-' for x in a_char[i] > 0]
    print(''.join(t))
print('第 {} 条数据表示的数字是： {}'.format(index, mnist.train.labels[index].argmax()))

----------------------------
----------------------------
----------------------------
----------------------------
----------------------------
-------------*******--------
-----------**********-------
----------***********-------
----------******--***-------
---------********--***------
----------**--***-****------
--------------*******-------
-----------**********-------
-----------*********--------
-----------********---------
-----------*******----------
--------------*****---------
------*---------***---------
-----***--------***---------
----***--------****---------
----***-------*****---------
----*****--*******----------
-----*************----------
-----***********------------
------********--------------
----------------------------
----------------------------
----------------------------
第 1 条数据表示的数字是： 3


In [8]:
# 其实输入数据保存的就是字符图像的抽象，例如下面训练集的第 3 条数据，保存的应该是数字 6
index = 3
a_char = mnist.train.images[index]
a_char = a_char.reshape([28, 28])
for i in range(28):
    t = ['*' if x > 0 else '-' for x in a_char[i] > 0]
    print(''.join(t))
print('第 {} 条数据表示的数字是： {}'.format(index, mnist.train.labels[index].argmax()))

----------------------------
----------------------------
----------------------------
---------------****---------
--------------*****---------
-------------******---------
------------******----------
------------****------------
-----------*****------------
----------*****-------------
----------****--------------
----------***---------------
---------****---------------
---------***-------*--------
---------***-----****-------
--------****---******-------
--------****--*******-------
--------*************-------
---------************-------
---------***********--------
----------********----------
----------******------------
----------***---------------
----------------------------
----------------------------
----------------------------
----------------------------
----------------------------
第 3 条数据表示的数字是： 6


In [9]:
# 学习率
learning_rate = 0.001
# 迭代次数（批次）
n_iter = 100000
# 批次大小
batch_size = 128

In [10]:
# 输入大小
input_size = 784
# 输出大小
target_size = 10

In [11]:
# 输入占位符
X = tf.placeholder(tf.float32, [None, input_size])
# 输出占位符
y = tf.placeholder(tf.float32, [None, target_size])

In [12]:
# 第一个卷积层
pitch_1 = tf.Variable(tf.random_normal([5, 5, 1, 32]), name='pitch_1')
pitch_1_bias = tf.Variable(tf.random_normal([32]), name='pitch_1_bias')

# 第二个卷积层
pitch_2 = tf.Variable(tf.random_normal([5, 5, 32, 64]), name='pitch_2')
pitch_2_bias = tf.Variable(tf.random_normal([64]), name='pitch_2_bias')

# 全连接层
weight_1 = tf.Variable(tf.random_normal([7 * 7 * 64, 1024]), name='weight_1')
bias_1 = tf.Variable(tf.random_normal([1024]), name='bias_1')

# 输出层
weight_2 = tf.Variable(tf.random_normal([1024, target_size]), name='weight_2')
bias_2 = tf.Variable(tf.random_normal([target_size]), name='bias_2')

In [13]:
X_ = tf.reshape(X, shape=[-1, 28, 28, 1])

In [14]:
conv_1 = tf.nn.relu(
    tf.nn.bias_add(
        tf.nn.conv2d(
            X_, pitch_1, strides=[1, 1, 1, 1], padding='SAME'
        ),
        pitch_1_bias,
        name='bias_add_1'
    ),
    name='relu_1'
)

In [15]:
maxpool_1 = tf.nn.max_pool(
    conv_1,
    ksize=[1, 2, 2, 1],
    strides=[1, 2, 2, 1],
    padding='SAME',
    name='max_pool_1'
)

In [16]:
conv_2 = tf.nn.relu(
    tf.nn.bias_add(
        tf.nn.conv2d(
            maxpool_1, pitch_2, strides=[1, 1, 1, 1], padding='SAME'
        ),
        pitch_2_bias,
        name='bias_add_2'
    ),
    name='relu_2'
)

In [17]:
maxpool_2 = tf.nn.max_pool(
    conv_2,
    ksize=[1, 2, 2, 1],
    strides=[1, 2, 2, 1],
    padding='SAME',
    name='max_pool_2'
)

In [18]:
flatten = tf.reshape(maxpool_2, [-1, 7 *7 * 64])

In [19]:
full_connect_1 = tf.nn.relu(
    tf.add(
        tf.matmul(flatten, weight_1, name='matmul_1'),
        bias_1,
        name='add_1'
    ),
    name='relu_3'
)

In [20]:
full_connect_2 = tf.add(
    tf.matmul(full_connect_1, weight_2, name='matmul_2'),
    bias_2,
    name='add_2'
)

In [21]:
pred = full_connect_2

In [22]:
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(
        pred, y
    )
)

In [23]:
opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [24]:
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [25]:
init = tf.initialize_all_variables()

In [26]:
with tf.Session() as sess:
    sess.run(init)
    n_trained = 0
    while n_trained < n_iter:
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        sess.run(opt, feed_dict={X: batch_x, y: batch_y})
        if n_trained > 0 and n_trained % (100 * batch_size) == 0:
            loss, acc = sess.run([cost, accuracy], feed_dict={X: batch_x, y: batch_y})
            print('{}/{}, loss: {:.4f}, acc: {:.4f}'.format(n_trained, n_iter, loss, acc))
        n_trained += batch_size
    print('calculate train accuracy')
    train_acc = []
    for _ in range(int(mnist.train.num_examples / batch_size)):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        train_acc.append(sess.run(accuracy, feed_dict={X: batch_x, y: batch_y}))
    print('calculate test accuracy')
    test_acc = []
    for _ in range(int(mnist.test.num_examples / batch_size)):
        batch_x, batch_y = mnist.test.next_batch(batch_size)
        test_acc.append(sess.run(accuracy, feed_dict={X: batch_x, y: batch_y}))
    print('Done')

12800/100000, loss: 2108.4458, acc: 0.8359
25600/100000, loss: 1438.3669, acc: 0.8203
38400/100000, loss: 361.9883, acc: 0.9609
51200/100000, loss: 604.2684, acc: 0.9375
64000/100000, loss: 912.9480, acc: 0.9141
76800/100000, loss: 373.8579, acc: 0.9609
89600/100000, loss: 524.4198, acc: 0.9609
calculate train accuracy
calculate test accuracy
Done


In [27]:
# 训练集准确率
print('train accuracy:', sum(train_acc) / len(train_acc))

train accuracy: 0.955510635198


In [28]:
# 测试集准确率
print('train accuracy:', sum(test_acc) / len(test_acc))

train accuracy: 0.952824519231
