使用神经网络对mnist的softmax拟合进行优化

In [1]:
import tensorflow as tf

In [2]:
#加载mnist数据
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
#定义模型和模型参数
x = tf.placeholder(tf.float32, [None, 784])
w = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, w) + b)
print(y)

Tensor("Softmax:0", shape=(?, 10), dtype=float32)


In [4]:
#损失函数，使用交叉熵损失
yy = tf.placeholder(tf.float32, [None, 10])
cross_entropy = -tf.reduce_sum(yy * tf.log(y))

In [5]:
#梯度下降训练
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

In [6]:
#初始化tf变量和图，开始训练
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [7]:
#迭代1000次，每隔100次打印输出
for i in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x:batch_xs,yy:batch_ys})
    if i % 100 == 0 :
        print("i:{},w:{},b:{}".format(i,sess.run(w), sess.run(b)))

i:0,w:[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]],b:[-0.01       -0.02999999 -0.01999997 -0.01       -0.01999997 -0.02999999
  0.04000003  0.02000003  0.03        0.03000006]
i:100,w:[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]],b:[-0.16080061  0.28654617 -0.05690459 -0.1044302   0.05088141  0.57086569
  0.01984028  0.27610818 -0.75028223 -0.13182306]
i:200,w:[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]],b:[-0.20494522  0.27545971 -0.03976974 -0.15389898  0.12952916  0.85522431
 -0.0420763   0.37827602 -0.99903202 -0.19876564]
i:3

In [8]:
#评估正确率
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(yy, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("accuracy:{}".format(sess.run(accuracy, feed_dict={x : mnist.test.images, yy : mnist.test.labels})))

accuracy:0.9164000153541565


    cnn的主要结构：
    1.卷积层特征过滤
    2.特征过滤，自动提取获取卷积核
    3.池化层处理，减少计算量
    4.激活后通过全连接层进行识别分类

In [9]:
#初始化权重和偏置参数的方法
def init_weight(shape):
    init = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init)
def init_bias(shape):
    init = tf.constant(0.1, shape=shape)
    return tf.Variable(init)

In [10]:
#卷积核定义为2*2大小的数组，池化层也定义为2*2的数组
#卷积和池化操作定义为两个函数,池化方法为取最大值
def conv2(x, w):
    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding="SAME")
    pass
def max_pooling(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
    pass

In [11]:
#实现第一层卷积，由一个卷积层和一个max_pooling层组成
w_conv1 = init_weight([5, 5, 1, 32])
b_conv1 = init_bias([32])

In [12]:
#将图片变为4d数组
x_image = tf.reshape(x, [-1, 28, 28, 1])
print(x_image)

Tensor("Reshape:0", shape=(?, 28, 28, 1), dtype=float32)


In [13]:
#对图片进行卷积和池化处理,然后用relu函数计算输出
h_conv1 = tf.nn.relu(conv2(x_image, w_conv1) + b_conv1)
h_pool1 = max_pooling(h_conv1)

In [14]:
#同理再实现第二层卷积
w_conv2 = init_weight([5, 5, 32, 64])
b_conv2 = init_bias([64])

h_conv2 = tf.nn.relu(conv2(h_pool1, w_conv2) + b_conv2)
h_pool2 = max_pooling(h_conv2)

In [15]:
#对处理的结果使用全连接层进行结果判定
w_fc1 = init_weight([7 * 7 * 64, 1024])
b_fc1 = init_bias([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)

In [16]:
#减少过拟合，添加dropout层
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

In [17]:
#输出层，使用softmax函数
w_fc2 = init_weight([1024, 10])
b_fc2 = init_bias([10])

y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, w_fc2) + b_fc2)

In [22]:
#训练和评估模型,使用交叉熵函数对结果进行评价
with tf.Session() as sess:
    cross_entropy = -tf.reduce_sum(yy * tf.log(y_conv))
    train_step = tf.train.AdagradOptimizer(1e-3).minimize(cross_entropy)
    cross_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(yy, 1))
    accuracy = tf.reduce_mean(tf.cast(cross_prediction, "float"))
    sess.run(tf.global_variables_initializer())
    for i in range(20000):
        batch = mnist.train.next_batch(50)
        if i % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict={x : batch[0], yy : batch[1], keep_prob:1.0})
#             print("step:{}, train_accuracy:{}".format(i, train_accuracy))
            train_step.run(feed_dict={x : batch[0], yy : batch[1], keep_prob : 0.5})
    print("test accuracy:{}".format(accuracy.eval(feed_dict={x:mnist.test.images, yy : mnist.test.labels, keep_prob : 1.0})))

test accuracy:0.9218000173568726
