In [1]:
import tensorflow as tf
import numpy as np

In [2]:
mnist = tf.keras.datasets.mnist.load_data()
(x_trainx, y_train), (x_testx, y_test) = mnist


x_train = np.reshape(x_trainx,(60000,784))
x_test = np.reshape(x_testx,(10000,784))

x_train = x_train / 255
x_test = x_test / 255

y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]



In [3]:
#batch
###very important
batch_size = 600
batch_num = 60000 // batch_size
x_train = np.split(np.array(x_train),batch_num)
y_train = np.split(np.array(y_train),batch_num)

max_step = 1000
keep_ = 0.8
log_dir = "logs/"

y_train[0].dtype

dtype('float64')

In [4]:

# 生成权重
def weight_variable(shape):
    return tf.Variable(tf.truncated_normal(shape,stddev=0.1),name='W')

# 生成偏差
def bias_vairable(shape):
    return tf.Variable(tf.constant(0.1, shape=shape),name='b')

# 记录变量
def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var-mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)

def conv2d(x,W):
    return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME',name='conv2d')
        
def conv_layer(input_tensor, weight_shape, layer_name, act=tf.nn.relu):
    with tf.name_scope(layer_name):
        with tf.name_scope('weights'):
            weights = weight_variable(weight_shape)
            variable_summaries(weights)
        with tf.name_scope('biases'):
            biases = bias_vairable([weight_shape[-1]])
            variable_summaries(biases)
        with tf.name_scope('conv_comput'):
            preactivate = conv2d(input_tensor,weights) + biases
        with tf.name_scope('activate'):
            activations = act(preactivate)
        return activations

def linear_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
    with tf.name_scope(layer_name):
        with tf.name_scope('weights'):
            weights = weight_variable([input_dim, output_dim])
            variable_summaries(weights)
        with tf.name_scope('biases'):
            biases = bias_vairable([output_dim])
            variable_summaries(biases)
        with tf.name_scope('linear_comput'):
            preactivate = tf.matmul(input_tensor,weights) + biases
        with tf.name_scope('activate'):
            activations = act(preactivate)
        return activations
        

def max_pool_2x2(x):
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME',name='Max_pool')

with tf.name_scope('Input'):
    x = tf.placeholder(tf.float32,[None,784],name='input_x')
    with tf.name_scope('Input_reshape'):
        x_image = tf.reshape(x,[-1,28,28,1],name='x-image')
        tf.summary.image('input',x_image,10)
    y = tf.placeholder(tf.float32,[None,10],name='input_y')
    keep_prob = tf.placeholder(tf.float32,name='keep_prob')

# 第一次卷积   28*28*1->28*28*32
conv_layer1 = conv_layer(x_image,[5,5,1,32],'conv_layer1')
# 池化之后变为 14*14*32
with tf.name_scope('Max_pool1'):
    h_pool1 = max_pool_2x2(conv_layer1)

# 第二次卷积 14*14*32->14*14*64
conv_layer2 = conv_layer(h_pool1,[5,5,32,64],'conv_layer2')
# 第二次池化之后变为 7*7*64
with tf.name_scope('Max_pool2'):
    h_pool2 = max_pool_2x2(conv_layer2)

with tf.name_scope('Flatten'):
    flatten_ = tf.reshape(h_pool2,[-1,7*7*64])
    
# 第一个全连接层 7*7*64 - 1024
fc1 = linear_layer(flatten_, 7*7*64, 1024, 'FC1')

with tf.name_scope('Dropput'):
    fc1_drop = tf.nn.dropout(fc1, keep_prob)
    
# 第二个全连接层 1024 - 10
logits = linear_layer(fc1_drop, 1024, 10, 'FC2',act=tf.nn.sigmoid)

with tf.name_scope('loss'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))
    tf.summary.scalar('loss',loss)
with tf.name_scope('train'):
    train_step = tf.train.AdamOptimizer(0.001).minimize(loss)

with tf.name_scope('accuracy'):
    prediction = tf.nn.softmax(logits)
    correct_prediction = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    tf.summary.scalar('accuracy', accuracy)
    
merged = tf.summary.merge_all()

def get_dict(train,index):
    if train:
        xs, ys = x_train[index], y_train[index]
        k = keep_
    else:
        xs, ys = x_test, y_test
        k = 1.0
    return {x:xs, y:ys, keep_prob: k}
                                 

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [5]:
with tf.Session() as sess:
    #train_writer = tf.summary.FileWriter(log_dir + '/train', sess.graph)
    #test_writer = tf.summary.FileWriter(log_dir + '/test')
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(21):
        for batch in range(batch_num):
            batch_xs, batch_ys = x_train[batch], y_train[batch]
            sess.run(train_step, feed_dict={x:batch_xs,y:batch_ys,keep_prob:0.7})
        acc = sess.run(accuracy, feed_dict={x:x_test, y:y_test, keep_prob:1.0})
        print("Iter: " + str(epoch) + ", acc: " + str(acc))
        
#     for i in range(max_step):
#         if i%10 == 0:
#             summary,acc = sess.run([merged,accuracy], feed_dict={x:x_test, y:y_test, keep_prob: keep_})
#             #test_writer.add_summary(summary, i)
#             print("Step: " + str(i) + ", acc: " + str(acc))
#         else:
#             summary,_ = sess.run([merged,train_step], feed_dict=get_dict(True, i % batch_num))

Iter: 0, acc: 0.9648
Iter: 1, acc: 0.9801
Iter: 2, acc: 0.9826
Iter: 3, acc: 0.986
Iter: 4, acc: 0.9872
Iter: 5, acc: 0.9884
Iter: 6, acc: 0.9884
Iter: 7, acc: 0.9892
Iter: 8, acc: 0.9888
Iter: 9, acc: 0.9887
Iter: 10, acc: 0.9897
Iter: 11, acc: 0.9916
Iter: 12, acc: 0.9908
Iter: 13, acc: 0.9924
Iter: 14, acc: 0.99
Iter: 15, acc: 0.9915
Iter: 16, acc: 0.9919
Iter: 17, acc: 0.9933
Iter: 18, acc: 0.9929
Iter: 19, acc: 0.9928
Iter: 20, acc: 0.9918
