In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters


In [2]:
def reset_graph(seed = 318):
    tf.reset_default_graph( )
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [4]:
import input_data

In [5]:
# mnist = input_data.read_data_sets("/data/stu12/mnist/", one_hot = True)
mnist = input_data.read_data_sets("../mnist/MNIST_data")

Extracting ../mnist/MNIST_data\train-images-idx3-ubyte.gz
Extracting ../mnist/MNIST_data\train-labels-idx1-ubyte.gz
Extracting ../mnist/MNIST_data\t10k-images-idx3-ubyte.gz
Extracting ../mnist/MNIST_data\t10k-labels-idx1-ubyte.gz


In [7]:
mnist.train.images.shape

(55000, 784)

In [6]:
mnist.train.labels.shape
# shape(x) = [None, 784]

(55000,)

### constant

In [8]:
# image
img_height = 28
img_width = 28
img_size = img_height*img_width
img_channels = 1 # 黑白图
img_classes = 10

kernel_height = 5
kernel_width = 5

n_inputs = img_width
n_steps = img_height

learning_rate = 1e-4

n_epochs = 10000

# INPUT: (n_kernels == n_channels)
# shape(x) == [None, img_height, img_width, last_n_kernels]
# shape(kernel) == [kernel_height, kernel_width, last_n_kernels, cur_n_kernels]
# padding == 'SAME'
# 
# OUTPUT:
# shape(conv) == [None, img_height, img_width, cur_n_kernels]

n_kernels_conv1 = 24
n_kernels_conv2 = 18

n_neurons_dense1 = 24
n_neurons_dense2 = 18
n_outputs = 10

n_neurons = 50

batch_size = 10

### placeholder variables

In [9]:
reset_graph()

x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

y = tf.placeholder(tf.float32, [None, img_classes])

### model

In [10]:
def conv_layer(x, n_kernels, activation = None):
    
    # w / filter / kernel
    # shape(x) = [None, img_height, img_width, input_channels]
    # shape(w) = [kernel_height, kernel_width, input_channels, n_kernels]
    # shape(b) = [n_kernels]
    
    n_inputs = int(x.get_shape()[-1])
    stddev = 2 / np.sqrt(n_inputs)
    W_init = tf.truncated_normal([kernel_height, kernel_width, n_inputs, n_kernels], stddev = stddev)
    W = tf.Variable(W_init)
    
    b_init = tf.zeros([n_kernels], tf.float32)
    b = tf.Variable(b_init)
    
    # Convolution (sigma)
    conv = tf.nn.conv2d(x, W, strides = [1,1,1,1], padding = 'SAME')
    sigma = conv + b   # broadcasting

    # non-linear transform
    if activation is not None:
        act = activation(sigma)
    else:
        act = sigma
        
    # pooling
    # shape(x) = [None, img_height, img_width, input_channels]
    pool = tf.nn.max_pool(act, ksize = [1,2,2,1], strides=[1,2,2,1], padding = 'SAME')
        
    return pool    

In [11]:
def dense_layer(x, n_neurons, activation = None):
    
    #shape(x) = [None, pic_size * pic_channels]
    #shape(w) = [n_inputs, n_neurons]
    #shape(b) = [n_neurons]
    with tf.name_scope('dense'):
        n_inputs = int(x.get_shape()[-1])
        stddev = 2 / np.sqrt(n_inputs)
        W_init = tf.truncated_normal([n_inputs, n_neurons], stddev = stddev)
        W = tf.Variable(W_init, name='W_dense')

        b_init = tf.zeros([n_neurons])
        b = tf.Variable(b_init, name = 'b_dense')

        # sigma
        sigma = tf.matmul(x, W) + b

    # non-linear transformation
    if activation is not None:
        return activation(sigma)
    else:
        return sigma

In [12]:
# 把输入变成卷机网络输入所要求的格式
x_conv = tf.reshape(x, [-1, img_height, img_width, img_channels])

# 第一层卷机网络
conv1 = conv_layer(x_conv, n_kernels_conv1, tf.nn.relu)

# 第二层卷积网络
conv2 = conv_layer(conv1, n_kernels_conv2, tf.nn.relu)

# shape(conv2) == [None, 7, 7, n_kernels_conv2]

# 全连接层第一层
# shape(x_dense) == [None, 7*7*n_kernels_conv2]

# 把上一层的输出数据格式转换成全连接网络输入所需要的格式
x_dense = tf.reshape(conv2, [-1, 7*7*n_kernels_conv2])
dense1 = dense_layer(x_dense, n_neurons_dense1, tf.nn.tanh)
dense2 = dense_layer(dense1, n_neurons_dense2, tf.nn.tanh)
prediction = dense_layer(dense2, n_outputs, tf.nn.softmax)

#### 第二种方法：tensorflow自带的公式将每一层包装

In [None]:
# #输入数据格式转换成圈基层要求的格式
# x_conv = tf.reshape(x, [-1, img_height, img_width, img_channels])

# #第一卷积曾
# conv1 = tf.layers.conv2d(x_conv, n_kernels_conv1, 
#                          kernel_size = (kernel_height, kernel_width), 
#                          strides = (1,1), 
#                          padding = 'same', activation = tf.nn.relu)
# pool1 = tf.layers.max_pooling2d(conv1, (2, 2), strides = (2,2), padding = 'same')

# # 第二卷积层
# conv2 = tf.layers.conv2d(pool1, n_kernels_conv2,
#                         kernel_size = (kernel_height, kernel_width),
#                         strides = (1,1),
#                         padding = 'same', activation = tf.nn.relu)
# pool2 = tf.layers.max_pooling2d(conv2, (2, 2), strides = (2,2), padding = 'same')

In [None]:
# x_conv = tf.reshape(x, [-1, img_height, img_width, img_channels])

# conv1 = tf.layers.conv2d(x_conv, n_kernels_conv1, (5,5), padding='same', activation = tf.nn.relu)
# print(conv1.shape)
# pool1 = tf.layers.max_pooling2d(conv1, (2,2), (2,2), padding='same')
# print(pool1.shape)
# conv2 = tf.layers.conv2d(pool1, n_kernels_conv2, (5,5), padding='same', activation=tf.nn.relu)
# print(conv2.shape)
# pool1 = tf.layers.max_pooling2d(conv2, (2,2), (2,2), padding='same')
# print(pool1.shape)

# x_dense = tf.reshape(pool1, [-1, 7*7*n_kernels_conv2])
# print(x_dense.shape)

# dense1 = tf.layers.dense(x_dense, n_neurons_dense1, activation = tf.nn.tanh)
# print(dense1.shape)
# dense2 = tf.layers.dense(dense1, n_neurons_dense2, activation= tf.nn.tanh)
# print(dense2.shape)
# prediction = tf.layers.dense(dense2, n_outputs)
# print(prediction.shape)

In [14]:
# # 把卷积层输出数据的格式转换成全连接层需要的格式
# x_dense = tf.reshape(pool2, [-1, 7*7*n_kernels_conv2])

# # 第一全连接层
# dense1 = tf.layers.dense(x_dense, n_neurons_dense1, activation=tf.nn.tanh)

# # 第二全连接层
# dense2 = tf.layers.dense(dense1, n_neurons_dense2, activation=tf.nn.tanh)

# # 输出 (原来用softmax)
# prediction = tf.layers.dense(dense2, img_classes)

### loss function(train)

In [16]:
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    labels = y, logits = prediction)

loss = tf.reduce_mean(cross_entropy)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



### optimizer

In [None]:
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)

### model evaluation(test, validation, train)

In [None]:
# # tf.argmax(y, axis = 1)
# # shape(y) == [None, img_classes]  # (0, 1)

# correct = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1)) 
# accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

or

In [None]:
correct = tf.nn.in_top_k(prediction, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

# Build a session

In [None]:
# 存储器
loss_summary = tf.summary.scalar('loss', loss)
acc_summary = tf.summary.scalar('acc', accuracy)

merged = tf.summary.merge_all()

# 写出到磁盘上
log_dir = '../mnist_models'
file_writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())

model_path = '../mnist_models'
saver = tf.train.Saver()

In [None]:
init = tf.global_variables_initializer()

n_steps = img_height
n_inputs = img_width

with tf.Session() as sess:
    
    sess.run(init)
#     saver.restore(sess, model_path)
    
    for epoch in range(n_epochs):
        
        # train
        x_batch, y_batch = mnist.train.next_batch(batch_size)
        x_batch_rnn = np.reshape(x_batch, [-1, n_steps, n_inputs])
        merged_str, train_acc, _ = sess.run(
            [merged, accuracy, train], 
            feed_dict = {x:x_batch_rnn, y:y_batch})
        
        # test
        x_batch, y_batch = mnist.test.next_batch(batch_size)
        x_batch_rnn = np.reshape(x_batch, [-1, n_steps, n_inputs])
        test_acc = sess.run(accuracy, 
                            feed_dict = {x:x_batch_rnn, y:y_batch})
        
        file_writer.add_summary(merged_str, epoch)
        
        print("epoch: ", str(epoch), "; train_acc: ", str(train_acc),
             "; test_acc: ", str(test_acc))
        
        if epoch % 100 == 0:
            saver.save(sess, model_path)
    
    file_writer.close( )    