# 分类问题

下面介绍了如何使用TF来做Mnist的分类问题。

## 获得数据

使用`tf.keras.datasets`获取数据。

In [132]:
import tensorflow as tf
import numpy as np

# 加载数据用keras
mnist = tf.keras.datasets.mnist
(train_data, train_label), (test_data, test_label) = mnist.load_data()

def convert_to_one_hot(y, C):
    return np.eye(C)[y.reshape(-1)]

## 预处理

In [133]:
train_data, test_data = train_data/255.0, test_data/255.0
train_data = train_data.reshape([-1, 784])
print("Shape of train data:", train_data.shape)
print("Shape of train label:", train_label.shape)
print("Label samples:", train_label[:5])
train_label = convert_to_one_hot(train_label, 10)
print("Shape of train label after processing:", train_label.shape)
print("Label samples after processing:", train_label[:5])

Shape of train data: (60000, 784)
Shape of train label: (60000,)
Label samples: [5 0 4 1 9]
Shape of train label after processing: (60000, 10)
Label samples after processing: [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


In [134]:
xs = tf.placeholder(tf.float32, [None, 784])
ys = tf.placeholder(tf.float32, [None, 10])

In [135]:
def add_layer(inputs, in_sz, out_sz, layer_index, activation_func=None):
    Weights = tf.Variable(tf.random_normal((in_sz, out_sz)), name="weights_"+str(layer_index), 
                          dtype=tf.float32)
    biases = tf.Variable(tf.zeros(out_sz), name="biases_"+str(layer_index))
    a = tf.matmul(inputs, Weights) + biases
    if activation_func is None:
        return a
    else:
        return activation_func(a)

In [138]:
pred = add_layer(xs, 784, 10, layer_index=1, activation_func=tf.nn.softmax)
cross_entropy1 = -(ys*tf.log(pred))
cross_entropy2 = tf.reduce_sum(cross_entropy1, reduction_indices=[1])
cross_entropy3 = tf.reduce_mean(cross_entropy2)

In [137]:
train = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy3)
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    batch_xs = train_data
    batch_ys = train_label
    for i in range(100):
        sess.run(train, feed_dict={xs:batch_xs, ys:batch_ys})
        if i % 50 == 0:
            print("True label:", batch_ys[:5])
            print("Pred label:", sess.run(pred[:5], 
                                          feed_dict={xs:batch_xs, ys:batch_ys}))
            print("Loss1:", sess.run(cross_entropy1[:5], 
                           feed_dict={xs:batch_xs, ys:batch_ys}))
            print("Loss2:", sess.run(cross_entropy2, 
                           feed_dict={xs:batch_xs, ys:batch_ys}))
            print("Loss3:", sess.run(cross_entropy3, 
                           feed_dict={xs:batch_xs, ys:batch_ys}))
            print()

True label: [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]
Pred label: [[1.4511282e-09 6.2916195e-03 2.5573039e-09 6.6535590e-06 1.7740781e-08
  5.9213598e-06 1.8403912e-01 2.9989583e-03 2.1696816e-01 5.8968949e-01]
 [4.1566297e-09 7.1455366e-07 1.5454352e-09 1.1043180e-05 2.5775649e-05
  5.6889025e-06 1.3029569e-02 7.3704726e-08 2.3348871e-01 7.5343841e-01]
 [1.7582323e-20 4.9511345e-07 4.5375273e-07 9.6362371e-08 2.7559185e-05
  1.5442931e-05 4.1148797e-07 3.3592040e-04 1.5096773e-06 9.9961805e-01]
 [5.3631347e-03 4.1131851e-05 5.5006766e-01 1.7698538e-03 2.7956554e-01
  1.3417709e-01 8.7300014e-06 3.5056444e-05 1.5739401e-04 2.8814372e-02]
 [3.4017383e-10 7.7473550e-10 2.4801400e-03 4.2004888e-07 5.6926810e-06
  1.9614922e-02 5.5557389e-11 7.7949947e-04 9.7711915e-01 1.3279511e-07]]
Loss1: [[ 0.        0.        0.        0.        0.       12.036944  0.
   0.      