# RNN的使用

本文使用LSTM来做Mnist分类

## 加载Mnist数据

In [1]:
import tensorflow as tf
import numpy as np

# 加载数据用keras
mnist = tf.keras.datasets.mnist
(train_data, train_label), (test_data, test_label) = mnist.load_data()

def convert_to_one_hot(y, C):
    return np.eye(C)[y.reshape(-1)]

In [2]:
train_data, test_data = train_data/255.0, test_data/255.0
train_data = train_data.reshape([-1, 28, 28])  # (batch_size, n_steps, n_inputs)
train_label = convert_to_one_hot(train_label, 10)  # (batch_size, n_classes)

## 设置RNN参数

In [3]:
lr = 0.001
train_iters = 50
batch_size = 128
n_inputs = 28  # 每个单元的输入维度
n_steps = 28   # 时刻数目
n_hidden = 128
n_classes = 10

## 使用LSTM分类

In [4]:
inputs = tf.placeholder(name="inputs", shape=[None, 28, 28], dtype=tf.float32)
labels = tf.placeholder(name="labels", shape=[None, 10], dtype=tf.float32)
print("Inputs:", inputs)
print("Labels:", labels)

Inputs: Tensor("inputs:0", shape=(?, 28, 28), dtype=float32)
Labels: Tensor("labels:0", shape=(?, 10), dtype=float32)


In [5]:
with tf.variable_scope("test", reuse=tf.AUTO_REUSE) as scope:
    # 建立一个RNN CELL，需要给隐藏层的数目
    cell = tf.nn.rnn_cell.LSTMCell(num_units=n_hidden)
    # 设置初始的状态
    state = cell.zero_state(batch_size, dtype=tf.float32)
    # 这里其实是执行了时序上的连续操作
    outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, 
                                             dtype=tf.float32, 
                                             initial_state=state)
    print("All outputs:", outputs)
    print("Final state:", final_state)
    
    w = tf.Variable(tf.random_normal((n_hidden, n_classes)), dtype=tf.float32)
    b = tf.Variable(tf.zeros((n_classes, )), dtype=tf.float32)
    pred = tf.matmul(final_state[1], w) + b
    print("Pred:", pred)
    
    #loss = tf.reduce_mean(tf.reduce_sum((-labels*tf.log(pred)), 
    #                           reduction_indices=[1]))
    # 调用必须要使用logits=*和labels=*
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=labels))
    train = tf.train.AdamOptimizer(lr).minimize(loss)
    
    # 计算准确度
    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

All outputs: Tensor("test/rnn/transpose_1:0", shape=(128, 28, 128), dtype=float32)
Final state: LSTMStateTuple(c=<tf.Tensor 'test/rnn/while/Exit_3:0' shape=(128, 128) dtype=float32>, h=<tf.Tensor 'test/rnn/while/Exit_4:0' shape=(128, 128) dtype=float32>)
Pred: Tensor("test/add:0", shape=(128, 10), dtype=float32)
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [6]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    step = 0
    while step  < train_iters:
        indexs = np.random.choice(train_data.shape[0], size=(batch_size, ))
        batch_xs = np.take(train_data, axis=0, indices=indexs)
        batch_ys = np.take(train_label, axis=0, indices=indexs)
        sess.run(train, feed_dict={inputs:batch_xs, labels:batch_ys})
        print("Accuracy:", sess.run(accuracy, feed_dict={inputs:batch_xs, labels:batch_ys}))
        step += 1

Accuracy: 0.1875
Accuracy: 0.2265625
Accuracy: 0.2890625
Accuracy: 0.3046875
Accuracy: 0.40625
Accuracy: 0.3828125
Accuracy: 0.3359375
Accuracy: 0.3828125
Accuracy: 0.3046875
Accuracy: 0.3828125
Accuracy: 0.3984375
Accuracy: 0.3828125
Accuracy: 0.4609375
Accuracy: 0.515625
Accuracy: 0.4140625
Accuracy: 0.5625
Accuracy: 0.484375
Accuracy: 0.484375
Accuracy: 0.53125
Accuracy: 0.59375
Accuracy: 0.4921875
Accuracy: 0.5078125
Accuracy: 0.546875
Accuracy: 0.515625
Accuracy: 0.546875
Accuracy: 0.6640625
Accuracy: 0.5546875
Accuracy: 0.5859375
Accuracy: 0.625
Accuracy: 0.640625
Accuracy: 0.7109375
Accuracy: 0.6796875
Accuracy: 0.6328125
Accuracy: 0.578125
Accuracy: 0.625
Accuracy: 0.625
Accuracy: 0.65625
Accuracy: 0.7421875
Accuracy: 0.6171875
Accuracy: 0.71875
Accuracy: 0.7109375
Accuracy: 0.6328125
Accuracy: 0.7265625
Accuracy: 0.6953125
Accuracy: 0.71875
Accuracy: 0.734375
Accuracy: 0.828125
Accuracy: 0.7265625
Accuracy: 0.7421875
Accuracy: 0.734375


In [10]:
a = tf.random_normal((2, 3))
b = [tf.random_normal((3)), tf.random_normal((3))]
c = a + b

ValueError: Shape must be rank 1 but is rank 0 for 'random_normal_4/RandomStandardNormal' (op: 'RandomStandardNormal') with input shapes: [].