In [1]:
import math
import numpy as np
import sys
import tensorflow as tf

sys.path.append('/home/dong/Dropbox/Projects/NLP/seq2seq')
from seq2seq.encoders import rnn_encoder
from seq2seq.decoders import basic_decoder

In [2]:
from seq2seq.contrib.seq2seq import decoder as contrib_decoder

In [3]:
PAD = 0
EOS = 1

vocab_size = 10
input_embedding_size = 50

# 第一层的encoder RNN cell 的 hidden_state_size
encoder_hidden_units = 50

# 与hred一致
decoder_hidden_units = encoder_hidden_units * 2

import helpers as data_helpers
batch_size = 11
round_num = 20

# 一个generator，每次产生一个minibatch的随机样本

batches = data_helpers.random_sequences(length_from=3, length_to=8,
                                   vocab_lower=2, vocab_upper=10,
                                   batch_size=batch_size*round_num)

### demo print

In [4]:
def demo_mult_rounds(batches, batch_size, round_num):
    data = next(batches)
    mb = list()
    id = 0
    for i in range(batch_size):
        mb.append([])
        for j in range(round_num):
            mb[-1].append(data[id])
            id += 1
    return mb

print('产生%d组的sequences, \n'
      '每一组sequence包含%d句长度不一（最短3，最长8）的sequence, \n'
      '其中前十组是:\n' % (batch_size, round_num))

for seq in demo_mult_rounds(batches, batch_size, round_num):
    print('%s\n' % seq)

产生11组的sequences, 
每一组sequence包含20句长度不一（最短3，最长8）的sequence, 
其中前十组是:

[[5, 9, 3, 4, 9, 5], [8, 9, 5, 7], [4, 9, 8, 5, 5], [6, 5, 8, 4, 5, 9, 9, 3], [6, 6, 9, 3, 9, 5, 3], [2, 4, 5, 9, 2, 9, 9, 9], [2, 8, 6, 2, 8, 7], [3, 5, 8, 3, 7, 5], [2, 5, 8, 3, 5, 6, 9], [3, 4, 9, 3, 9, 9, 4], [6, 5, 3, 9], [8, 2, 5, 9, 7, 4, 2], [8, 8, 5, 7, 8, 8, 7], [9, 3, 7, 8, 9, 9, 3], [4, 9, 4, 2, 5, 3, 5, 5], [5, 6, 7, 2, 5, 8, 8, 4], [5, 3, 8, 6, 5], [8, 2, 5, 8, 9, 7, 2, 7], [5, 8, 8], [9, 8, 7]]

[[5, 2, 5, 9, 9, 7], [6, 7, 5], [6, 3, 3, 9, 4, 7, 7, 4], [3, 9, 2, 8, 8, 9, 8, 8], [7, 2, 3, 7, 5, 3], [3, 8, 2], [3, 8, 6, 2], [6, 4, 9, 3, 9], [2, 9, 5, 4, 6], [8, 9, 4, 9, 2, 5, 4], [2, 7, 9, 5, 3], [2, 5, 9], [7, 7, 2, 5, 4], [8, 8, 3, 4, 5, 6], [9, 9, 4, 2, 9, 5], [6, 9, 8], [8, 8, 5, 4, 6, 4, 5, 9], [2, 6, 4, 5, 3], [2, 3, 4, 6, 2], [3, 5, 3]]

[[9, 9, 8, 6, 7, 8, 4], [8, 5, 2, 9, 4, 4, 9, 5], [6, 3, 3, 8, 4, 6], [9, 6, 7, 7, 5, 8, 8, 9], [4, 2, 5, 5, 2, 9, 3, 4], [7, 7, 5, 2, 6, 8, 7], [6, 6, 7], [6, 6, 8

## 产生轮数为20的合成数据

#### 使用连续20个sequence模拟一个轮数为20的对话数据

#### 第i-轮的decoder输出是从第1句到第i-句输入的拼接

In [5]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
mode = tf.contrib.learn.ModeKeys.TRAIN

In [6]:
with tf.name_scope('minibatch_encoder'):
    # 一个 minibatch 包含 batch_size * round_num 个 sequences
    encoder_inputs = tf.placeholder(shape=(batch_size*round_num, None),
                                    dtype=tf.int32,
                                    name='encoder_inputs')
    encoder_inputs_length = tf.placeholder(shape=(batch_size*round_num,),
                                           dtype=tf.int32,
                                           name='encoder_inputs_length')
with tf.name_scope('minibatch-decoder'):
    decoder_targets = tf.placeholder(shape=(batch_size*round_num, None),
                                     dtype=tf.int32,
                                     name='decoder_targets')
    
    decoder_inputs = tf.placeholder(shape=(batch_size*round_num, None),
                                    dtype=tf.int32,
                                    name='decoder_inputs')
    decoder_inputs_length = tf.placeholder(shape=(batch_size*round_num,),
                                            dtype=tf.int32,
                                            name='decoder_inputs_length')

## encoding阶段

In [7]:
# 每个句子encoding的超参数
encoder_params = rnn_encoder.StackBidirectionalRNNEncoder.default_params()
encoder_params["rnn_cell"]["cell_params"]["num_units"] = encoder_hidden_units
encoder_params["rnn_cell"]["cell_class"] = "BasicLSTMCell"
encoder_params["rnn_cell"]["num_layers"] = 2
encoder_params

{'init_scale': 0.04,
 'rnn_cell': {'cell_class': 'BasicLSTMCell',
  'cell_params': {'num_units': 50},
  'dropout_input_keep_prob': 1.0,
  'dropout_output_keep_prob': 1.0,
  'num_layers': 2,
  'residual_combiner': 'add',
  'residual_connections': False,
  'residual_dense': False}}

In [8]:
# 第一层 embedding
with tf.name_scope('embedding'):
    input_embeddings = tf.Variable(
        tf.random_uniform([vocab_size, input_embedding_size], -1.0, 1.0),
        dtype=tf.float32)

mode = tf.contrib.learn.ModeKeys.TRAIN
encoder_inputs_embedded = tf.nn.embedding_lookup(
    input_embeddings, encoder_inputs)
encode_fn = rnn_encoder.StackBidirectionalRNNEncoder(
    encoder_params, mode)
encoder_output = encode_fn(
    encoder_inputs_embedded, encoder_inputs_length)

INFO:tensorflow:Creating StackBidirectionalRNNEncoder in mode=train
INFO:tensorflow:
StackBidirectionalRNNEncoder:
  init_scale: 0.04
  rnn_cell:
    cell_class: BasicLSTMCell
    cell_params: {num_units: 50}
    dropout_input_keep_prob: 1.0
    dropout_output_keep_prob: 1.0
    num_layers: 2
    residual_combiner: add
    residual_connections: false
    residual_dense: false



In [9]:
print('outputs: %s\n\n' % repr(encoder_output.outputs))
print('final state: %s\n\n' % repr(encoder_output.final_state))
print('attention values: %s' % repr(encoder_output.attention_values))

outputs: <tf.Tensor 'stacked_bidi_rnn_encoder/stack_bidirectional_rnn/cell_1/concat:0' shape=(220, ?, 100) dtype=float32>


final state: ((LSTMStateTuple(c=<tf.Tensor 'stacked_bidi_rnn_encoder/stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_2:0' shape=(?, 50) dtype=float32>, h=<tf.Tensor 'stacked_bidi_rnn_encoder/stack_bidirectional_rnn/cell_0/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 50) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'stacked_bidi_rnn_encoder/stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_2:0' shape=(?, 50) dtype=float32>, h=<tf.Tensor 'stacked_bidi_rnn_encoder/stack_bidirectional_rnn/cell_1/bidirectional_rnn/fw/fw/while/Exit_3:0' shape=(?, 50) dtype=float32>)), (LSTMStateTuple(c=<tf.Tensor 'stacked_bidi_rnn_encoder/stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_2:0' shape=(?, 50) dtype=float32>, h=<tf.Tensor 'stacked_bidi_rnn_encoder/stack_bidirectional_rnn/cell_0/bidirectional_rnn/bw/bw/while/Exit_3:0' sh

#### 处理第一层encoder的输出

In [10]:
encoder_final_state_c = tf.concat(
    (encoder_output.final_state[0][1].c, 
     encoder_output.final_state[1][1].c), 
    1)

encoder_final_state_h = tf.concat(
    (encoder_output.final_state[0][1].h,
     encoder_output.final_state[1][1].h),
    1)

encoder_final_state = tf.nn.rnn_cell.LSTMStateTuple(
    c=encoder_final_state_c,
    h=encoder_final_state_h
)

In [11]:
encoder_final_state

LSTMStateTuple(c=<tf.Tensor 'concat:0' shape=(?, 100) dtype=float32>, h=<tf.Tensor 'concat_1:0' shape=(?, 100) dtype=float32>)

## decoding阶段

In [12]:
# 准备新的输入
from seq2seq.contrib.seq2seq import helper as decode_helper
with tf.name_scope('decoder_input'):
    decoder_inputs_embedded = tf.nn.embedding_lookup(
        input_embeddings, decoder_inputs)

In [13]:
with tf.name_scope('decoder_helper'):
    helper_ = decode_helper.TrainingHelper(
        inputs = decoder_inputs_embedded,
        sequence_length = decoder_inputs_length)

In [14]:
decode_params = basic_decoder.BasicDecoder.default_params()
decode_params["rnn_cell"]["cell_params"]["num_units"] = decoder_hidden_units
decode_params["max_decode_length"] = batch_size * round_num + 5

decode_params

{'init_scale': 0.04,
 'max_decode_length': 225,
 'rnn_cell': {'cell_class': 'BasicLSTMCell',
  'cell_params': {'num_units': 100},
  'dropout_input_keep_prob': 1.0,
  'dropout_output_keep_prob': 1.0,
  'num_layers': 1,
  'residual_combiner': 'add',
  'residual_connections': False,
  'residual_dense': False}}

In [15]:
decoder_fn = basic_decoder.BasicDecoder(params=decode_params,
                                        mode=mode,
                                        vocab_size=vocab_size)

INFO:tensorflow:Creating BasicDecoder in mode=train
INFO:tensorflow:
BasicDecoder:
  init_scale: 0.04
  max_decode_length: 225
  rnn_cell:
    cell_class: BasicLSTMCell
    cell_params: {num_units: 100}
    dropout_input_keep_prob: 1.0
    dropout_output_keep_prob: 1.0
    num_layers: 1
    residual_combiner: add
    residual_connections: false
    residual_dense: false



In [16]:
decoder_output, decoder_state = decoder_fn(
    encoder_final_state,
    helper_)

In [17]:
indices = tf.constant(
    [[x] for x in range(round_num-1, batch_size*round_num, round_num)],
    dtype=tf.int32)

loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(
        labels=tf.one_hot(tf.gather_nd(params = decoder_targets,
                                       indices = indices),
                          depth=vocab_size, dtype=tf.float32),
        logits=tf.gather_nd(params = tf.transpose(decoder_output.logits,
                                         perm = [1, 0, 2]),
                           indices = indices)
    )
)

train_op = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(loss)

In [18]:
sess.run(tf.global_variables_initializer())

In [20]:
import os 

log_path = log_path = os.path.join(os.getcwd(), 'arch-basic_rnn')
summary_writer = tf.summary.FileWriter(log_path, sess.graph)


# 保存模型
# word2vec参数的单词和词向量部分分别保存到了metadata和ckpt文件里面
saver = tf.train.Saver()
saver.save(sess, os.path.join(log_path, "model.ckpt"))

'/home/dong/Dropbox/Projects/NLP/lecture5/arch-basic_rnn/model.ckpt'

## 训练阶段

In [None]:
batch = next(batches)

cumbatch = []
for i in range(len(batch)):
    if i%round_num==0:
        cumbatch.append(batch[i])
    else:
        cumbatch.append(batch[i] + cumbatch[-1])

encoder_inputs_, encoder_inputs_length_ = data_helpers.batch(batch)
decoder_targets_, _ = data_helpers.batch(
    [(sequence) + [EOS] for sequence in cumbatch]
)
decoder_inputs_, decoder_inputs_length_ = data_helpers.batch(
    [[EOS] + (sequence) for sequence in cumbatch]
)

In [None]:
decoder_targets_.T
decoder_inputs_.T

In [None]:
def next_feed():
    batch = next(batches)

    cumbatch = []
    for i in range(len(batch)):
        if i%round_num==0:
            cumbatch.append(batch[i])
        else:
            cumbatch.append(batch[i] + cumbatch[-1])

    encoder_inputs_, encoder_inputs_length_ = data_helpers.batch(cumbatch)
    decoder_targets_, _ = data_helpers.batch(
        [(sequence) + [EOS] for sequence in cumbatch]
    )
    decoder_inputs_, decoder_inputs_length_ = data_helpers.batch(
        [[EOS] + (sequence) for sequence in cumbatch]
    )    
    # 在feedDict里面，key可以是一个Tensor
    return {
        encoder_inputs: encoder_inputs_.T,
        decoder_inputs: decoder_inputs_.T,
        decoder_targets: decoder_targets_.T,
        encoder_inputs_length: encoder_inputs_length_,
        decoder_inputs_length: decoder_inputs_length_
    }

In [None]:
loss_track = []
fd = next_feed()
_, l = sess.run([train_op, loss], fd)


In [None]:
print('batch {}'.format(batch))
print('  minibatch loss: {}'.format(sess.run(loss, fd)))
predict_ = sess.run(decoder_output.predicted_ids, fd)
for i, (inp, targ, pred) in enumerate(
    zip(fd[encoder_inputs], 
        fd[decoder_targets], 
        predict_.T)):
    if i in [0, 2]:
        print('  sample {}:'.format(i + 1))
        print('    targets     > {}'.format(targ))
        print('    predicted > {}'.format(pred))
    if i == round_num-1:
        break
print()

In [None]:
loss_track = []
max_batches = 3001
batches_in_epoch = 100

try:
    # 一个epoch的learning
    for batch in range(max_batches):
        fd = next_feed()
        _, l = sess.run([train_op, loss], fd)
        loss_track.append(l)
        
        if batch == 0 or batch % batches_in_epoch == 0:
            print('batch {}'.format(batch))
            print('  minibatch loss: {}'.format(sess.run(loss, fd)))
            predict_ = sess.run(decoder_output.predicted_ids, fd)
            for i, (inp, targ, pred) in enumerate(
                zip(fd[encoder_inputs], 
                    fd[decoder_targets], 
                    predict_.T)):
                if i in [0, round_num-1]:
                    print('  sample {}:'.format(i + 1))
                    print('    targets     > {}'.format(targ))
                    print('    predicted > {}'.format(pred))
                if i == round_num-1:
                    break
            print()
        
except KeyboardInterrupt:
    print('training interrupted')

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(loss_track)
print('loss {:.4f} after {} examples (batch_size={})'.format(loss_track[-1], len(loss_track)*batch_size, batch_size))