# 问题：[可变长度序列的分类](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/dynamic_rnn.py)
- 方法： dynamic RNN
- 实现： tensorflow

In [1]:
import random
import tensorflow as tf
import numpy as np

  from ._conv import register_converters as _register_converters


---

## 数据 
- Class 0: linear sequences (i.e. [0, 1, 2, 3,...])
- Class 1: random sequences (i.e. [1, 3, 10, 7,...])

In [2]:
class Sequences:
    def __init__(self, n_samples=1000, max_seq_len=20, min_seq_len=3,
                max_value=1000):
        self.data = []
        self.labels = []
        self.seqlen = []
        for _ in range(n_samples):
            _len = random.randint(min_seq_len, max_seq_len)
            self.seqlen.append(_len)
            
            if random.random() < .5:
                _start = random.randint(0, max_value - _len)
                s = list([float(_)/max_value] for _ in range(_start, _start + _len))
                self.labels.append([1, 0])
            else:
                s = [[float(random.randint(0, max_value))/max_value] for _ in range(_len)]
                self.labels.append([0, 1])
    
            # pad
            s +=  [[0]] * (max_seq_len - _len)
            self.data.append(s)
        self.batch_id = 0
        
    def next(self, batch_size):
        if self.batch_id >= len(self.data):
            self.batch_id = 0
        _start, _end = self.batch_id, self.batch_id + batch_size
        data_batch = self.data[_start : _end]
        labels_batch = self.labels[_start : _end]
        seqlen_batch = (self.seqlen[_start : _end])
        self.batch_id = _end
        return data_batch, labels_batch, seqlen_batch
    
    def test(self):
        pass
s = Sequences(max_seq_len=20, min_seq_len=2)
s.test()

---
## 模型


In [3]:
learning_rate = .01
training_steps = 10000
batch_size = 128
display_step = 200

max_seq_len = 20
hidden_size = 64
n_classes = 2
train_set = Sequences(n_samples=1000, max_seq_len=max_seq_len)
test_set = Sequences(n_samples=100, max_seq_len=max_seq_len)

In [4]:
g = tf.Graph()
with g.as_default():
    _x = tf.placeholder(tf.float32, [None, max_seq_len, 1])
    y = tf.placeholder(tf.float32, [None, n_classes])
    seqlen = tf.placeholder(tf.int32, [None])
    _xavier = tf.contrib.layers.xavier_initializer()
    _zeros = tf.zeros_initializer()
    with tf.variable_scope('rnn', reuse=tf.AUTO_REUSE):
        U = tf.get_variable('out', shape=[hidden_size, n_classes],
                            dtype=tf.float32, initializer=_xavier)
        b = tf.get_variable('out_biase', shape=[n_classes], dtype=tf.float32, initializer=_zeros)
    #todo tf.unstack    
    x = tf.unstack(_x, max_seq_len, 1) #(value, num, axis) ==> (batch, 1)
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_size)
    #todo tf.nn.static_rnn
    outputs, states = tf.nn.static_rnn(lstm_cell, x, 
                                       dtype=tf.float32, sequence_length=seqlen)
    # tf.stack(values, axis=0) ==> (max_seq_len, batch_size, 1)
    outputs = tf.stack(outputs)
    
    #tf.transpose, (max_seq_len, batch_size, 1) ==> (batch_size, max_seq_len, 1) 
    outputs = tf.transpose(outputs, [1, 0, 2])
    _batch_size = tf.shape(outputs)[0]
    index = tf.range(0, _batch_size) * max_seq_len + (seqlen - 1)
    outputs = tf.gather(tf.reshape(outputs, [-1, hidden_size]), index)
    preds = tf.matmul(outputs, U) + b
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=y)
    cost = tf.reduce_mean(loss)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(cost)
    correct_pred = tf.equal(tf.argmax(preds,1), tf.argmax(y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [5]:
model_saved = './dynamic rnn/'
with tf.Session(graph=g) as sess:
    sess.run(init)
    #train steps
    for step in range(training_steps):
        batch_x, batch_y, batch_seqlen = train_set.next(batch_size)
        a = sess.run(train_op, feed_dict={_x: batch_x, y: batch_y,
                                       seqlen: batch_seqlen})
        if step % display_step == 0 or step == 1:
        # Calculate batch accuracy & loss
            acc, loss_ = sess.run([accuracy, cost], feed_dict={_x: batch_x, y: batch_y,
                                                seqlen: batch_seqlen})
            print("Step " + str(step*batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss_) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
    saver.save(sess, model_saved)

Step 0, Minibatch Loss= 0.693309, Training Accuracy= 0.51562
Step 128, Minibatch Loss= 0.692693, Training Accuracy= 0.51562
Step 25600, Minibatch Loss= 0.693772, Training Accuracy= 0.58594
Step 51200, Minibatch Loss= 0.694127, Training Accuracy= 0.59375
Step 76800, Minibatch Loss= 0.694464, Training Accuracy= 0.58594
Step 102400, Minibatch Loss= 0.694789, Training Accuracy= 0.58594
Step 128000, Minibatch Loss= 0.695103, Training Accuracy= 0.57031
Step 153600, Minibatch Loss= 0.695408, Training Accuracy= 0.58594
Step 179200, Minibatch Loss= 0.695703, Training Accuracy= 0.56250
Step 204800, Minibatch Loss= 0.695988, Training Accuracy= 0.55469
Step 230400, Minibatch Loss= 0.696261, Training Accuracy= 0.52344
Step 256000, Minibatch Loss= 0.696522, Training Accuracy= 0.50781
Step 281600, Minibatch Loss= 0.696769, Training Accuracy= 0.51562
Step 307200, Minibatch Loss= 0.696998, Training Accuracy= 0.51562
Step 332800, Minibatch Loss= 0.697206, Training Accuracy= 0.51562
Step 358400, Minibatc

## evaluation
restore

In [6]:
with tf.Session(graph=g) as sess:
    sess.run(init)
    saver.restore(sess, model_saved)
    test_data = test_set.data
    test_label = test_set.labels
    test_seqlen = test_set.seqlen
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={_x: test_data, y: test_label,
                                      seqlen: test_seqlen}))

INFO:tensorflow:Restoring parameters from ./dynamic rnn/
Testing Accuracy: 0.84


## todo
1. dropout
2. enclosure