In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import tensorflow as tf
import input_data

## 数据变换
![mnist_xy](./pic/1.mnist_xy.png)

## 数据读取
input_data提供读取方式，输入目录即可

In [4]:
image_data = input_data.read_data_sets('./',one_hot=True)

Instructions for updating:
Please use alternatives such as: tensorflow_datasets.load('mnist')
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/_DataSet.py from tensorflow/models.


In [5]:
image_data.train

<input_data._DataSet at 0x1942c99e470>

## 占位
定义一个占位符，方便每条实例占据x位置。

In [6]:
x = tf.placeholder("float", [None, 784])

## 变量
W和b是可变，需要调整的，定义为变量Variable。

In [7]:
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))

## softmax模型
定义y的输出

In [9]:
y = tf.nn.softmax(tf.matmul(x,W) + b)

## 交叉熵代价函数

In [10]:
y_ = tf.placeholder("float", [None,10])
cross_entropy = -tf.reduce_sum(y_*tf.log(y))

## 训练优化

In [11]:
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

## 初始化

In [13]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

## 迭代训练
训练图，映射输入。每次选一批就行训练。

In [15]:
for i in range(1000):
    batch_xs, batch_ys = image_data.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

## 模型评估
计算实际值和预测值中为1的索引，判断是否相同，计算准确率。

In [16]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

In [19]:
sess.run(accuracy, feed_dict={x: image_data.test.images, y_: image_data.test.labels})

0.8967

# 多层卷积神经
在python中高效运算，会用到numpy等库，但是切回原环境时，开销巨大，尤其是在GPU或分布式环境中。（数据迁移的开销）

所以tensorflow只建立图结构，然后完全在python执行，获得结果。

In [20]:
import tensorflow as tf
sess = tf.InteractiveSession()

## 权重初始化
模型中的权重在初始化时应该加入少量的噪声来打破对称性以及避免0梯度。

使用ReLU神经元，比较好的做法是用一个较小的正数来初始化偏置项，以避免神经元节点输出恒为0的问题。

1. tf.truncated_normal，获得正态分布数据
2. tf.Variable可用tf.constant先初始化获得

In [22]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

## 卷积和池化
需要设置步长和边距。

1. tf.nn.conv2d：x进行卷积计算，然后通过W映射到下一层。
2. tf.nn.max_pool：最大池

In [24]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

## 第一层卷积
1. 卷积的权重张量形状是[5, 5, 1, 32]，前两个维度是patch的大小，接着是输入的通道数目，最后是输出的通道数目。 
2. 对于每一个输出通道都有一个对应的偏置量。

In [25]:
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

图像变回平面表示，方便卷积。

In [26]:
x_image = tf.reshape(x, [-1,28,28,1])

利用relu进行输出，然后最大池化。

In [27]:
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

## 第二层卷积

In [28]:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

## 密集连接层
图片尺寸减小到7x7，加入一个有1024个神经元的全连接层，用于处理整个图片。

这就是最终输出。

In [29]:
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

## Dropout
对密集连接层进行dropout，保持比例。

In [30]:
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

## 输出层
10个输出

In [31]:
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

# 模型评估
在图后面连接上评估。

In [36]:
#交叉熵
cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))
#自适应寻找最优值
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
#准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

#进行训练
sess.run(tf.initialize_all_variables())
for i in range(5000):
    batch = image_data.train.next_batch(50)
    if i%100 == 0:
        train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
        print ("step %d, training accuracy %g"%(i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

print ("test accuracy %g"%accuracy.eval(feed_dict={x: image_data.test.images, y_: image_data.test.labels, keep_prob: 1.0}))

step 0, training accuracy 0.12
step 100, training accuracy 0.8
step 200, training accuracy 0.96
step 300, training accuracy 0.9
step 400, training accuracy 0.96
step 500, training accuracy 0.96
step 600, training accuracy 1
step 700, training accuracy 1
step 800, training accuracy 1
step 900, training accuracy 0.9
step 1000, training accuracy 0.94
step 1100, training accuracy 0.98
step 1200, training accuracy 0.96
step 1300, training accuracy 0.98
step 1400, training accuracy 0.96
step 1500, training accuracy 1
step 1600, training accuracy 0.98
step 1700, training accuracy 0.96
step 1800, training accuracy 0.94
step 1900, training accuracy 0.98
step 2000, training accuracy 1
step 2100, training accuracy 0.98
step 2200, training accuracy 0.98
step 2300, training accuracy 1
step 2400, training accuracy 0.98
step 2500, training accuracy 1
step 2600, training accuracy 1
step 2700, training accuracy 1
step 2800, training accuracy 0.96
step 2900, training accuracy 0.98
step 3000, training ac