# 演示seq2seq lib中的beam search使用方法

In [1]:
import math
import numpy as np
import sys
import tensorflow as tf

sys.path.append('/home/dong/Dropbox/Projects/NLP/seq2seq')
from seq2seq.encoders import rnn_encoder
from seq2seq.decoders import (basic_decoder, beam_search_decoder)

from seq2seq.inference import beam_search
from seq2seq.models import bridges

# 产生 demo 合成数据

In [2]:
PAD = 0
EOS = 1

vocab_size = 10
input_embedding_size = 16

encoder_hidden_units = 32
decoder_hidden_units = encoder_hidden_units

import helpers as data_helpers
batch_size = 7

# 一个generator，每次产生一个minibatch的随机样本

batches = data_helpers.random_sequences(length_from=3, length_to=8,
                                   vocab_lower=2, vocab_upper=10,
                                   batch_size=batch_size)

print('产生%d个长度不一（最短3，最长8）的sequences, 其中前十个是:' % batch_size)
for seq in next(batches)[:min(batch_size, 10)]:
    print(seq)

产生7个长度不一（最短3，最长8）的sequences, 其中前十个是:
[6, 6, 3, 5, 8, 2, 6]
[8, 9, 5, 7]
[4, 3, 2]
[2, 2, 7]
[5, 4, 7, 9, 5]
[8, 6, 9, 4]
[3, 9, 8]


# 定义使用beamsearch decoder的seq2seq模型

### 声明placholder和定义encoder部分，同part2A

In [3]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
mode = tf.contrib.learn.ModeKeys.INFER

# 数据部分：
# 1-a. 声明placeholder
with tf.name_scope('encoder-in'):
    encoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='encoder_inputs')
    encoder_inputs_length = tf.placeholder(shape=(None,),
                                           dtype=tf.int32,
                                           name='encoder_inputs_length')
with tf.name_scope('decoder-target'):
    decoder_targets = tf.placeholder(shape=(None, None),
                                     dtype=tf.int32,
                                     name='decoder_targets')
    decoder_targets_length = tf.placeholder(shape=(None,),
                                            dtype=tf.int32,
                                            name='decoder_targets_length')
with tf.name_scope('decoder-input'):
    decoder_inputs = tf.placeholder(shape=(None, None),
                                    dtype=tf.int32,
                                    name='decoder_inputs')
    decoder_inputs_length = tf.placeholder(shape=(None,),
                                            dtype=tf.int32,
                                            name='decoder_inputs_length')

# 1-b. 数据转化为embedding格式
# input_embeddings: [vocab_size, input_embedding_size]
# output_embeddings: [vocab_size, input_embedding_size]
with tf.name_scope('embedding'):
    input_embeddings = tf.Variable(
        tf.random_uniform([vocab_size, input_embedding_size], -1.0, 1.0),
        dtype=tf.float32)
    output_embeddings = tf.Variable(
        tf.random_uniform([vocab_size, decoder_hidden_units], -1.0, 1.0),
        dtype=tf.float32)
    

with tf.name_scope('input-embeddings'):
    encoder_inputs_embedded = tf.nn.embedding_lookup(
        input_embeddings, encoder_inputs)

# 2. 定义encoder
# 2-a. encoder 超参数
encoder_params = rnn_encoder.UnidirectionalRNNEncoder.default_params()
encoder_params["rnn_cell"]["cell_params"]["num_units"] = encoder_hidden_units
encoder_params["rnn_cell"]["cell_class"] = "BasicLSTMCell"
encoder_params

# 2-b. 使用UnidirectionalRNNEncoder编码
encode_fn = rnn_encoder.UnidirectionalRNNEncoder(
    encoder_params, mode)
encoder_output = encode_fn(
    encoder_inputs_embedded, encoder_inputs_length)

INFO:tensorflow:Creating UnidirectionalRNNEncoder in mode=infer
INFO:tensorflow:
UnidirectionalRNNEncoder:
  init_scale: 0.04
  rnn_cell:
    cell_class: BasicLSTMCell
    cell_params: {num_units: 32}
    dropout_input_keep_prob: 1.0
    dropout_output_keep_prob: 1.0
    num_layers: 1
    residual_combiner: add
    residual_connections: false
    residual_dense: false



![alt text](figure/seq2seq_uniRNNencoder.png)
UnidirectionalRNNEncoder

![alt text](figure/seq2seq_biRNNencoder.png)
BidirectionalRNNEncoder

```
 /\
/  \
 ||
```
由上面的seq2seq的源代码可见，使用dynamic_rnn或者其变种(bidirectional_dynamic_rnn)， encoder过程通常是简单直观的

## 定义decoding模型，使用
**seq2seq.decoders.beam_search_decoder.BeamSearchDecoder**


### 调用BeamSearchDecoder需要定义两组超参数

### 1. hyperparameter-group-1: 
**decoder RNN 的选项，任何基于RNN的decoding操作(e.g. BasicDecoder)都需要设定的超参数**

In [4]:
decode_params = beam_search_decoder.BeamSearchDecoder.default_params()
decode_params["rnn_cell"]["cell_params"]["num_units"] = decoder_hidden_units
decode_params

{'init_scale': 0.04,
 'max_decode_length': 100,
 'rnn_cell': {'cell_class': 'BasicLSTMCell',
  'cell_params': {'num_units': 32},
  'dropout_input_keep_prob': 1.0,
  'dropout_output_keep_prob': 1.0,
  'num_layers': 1,
  'residual_combiner': 'add',
  'residual_connections': False,
  'residual_dense': False}}

### 2. hyperparameter-group-2

**设置 beam_search 的选项，即针对 beam_search 操作的超参数**

* beam_width
* length_penalty_weight
* choose_successors_fn

In [5]:
config = beam_search.BeamSearchConfig(
    beam_width = 3,
    vocab_size = vocab_size,
    eos_token = EOS,
    length_penalty_weight = 0.6,
    choose_successors_fn = beam_search.choose_top_k)

一个beam search设置的例子：

| hyper-param | value | 
| ------------ | --------- | 
| beam_width | 10 |
|vocab_size | 10 |
|eos_token | 1 |
|length_penalty_weight | 0.600000 |
|choose_successors_fn | ```<function choose_top_k at 0x7f83ec705840>``` |

![alt text](figure/seq2seq-bs-length.png)

In [6]:
from seq2seq.contrib.seq2seq import helper as decode_helper

![alt text](figure/seq2seq-bs-decoder-in.png)

In [7]:
decoder_fn = basic_decoder.BasicDecoder(
    params=decode_params,
    mode=mode,
    vocab_size=vocab_size)

# initialize the BeamSearchDecoder
# arguments:
#   decoder: A instance of "RNNDecoder" to be used with beam search
#   config: A "BeamSearchConfig" that defines beam search decoding parameters
decoder_fn = beam_search_decoder.BeamSearchDecoder(
    decoder=decoder_fn,
    config=config)



INFO:tensorflow:Creating BasicDecoder in mode=infer
INFO:tensorflow:
BasicDecoder:
  init_scale: 0.04
  max_decode_length: 100
  rnn_cell:
    cell_class: BasicLSTMCell
    cell_params: {num_units: 32}
    dropout_input_keep_prob: 1.0
    dropout_output_keep_prob: 1.0
    num_layers: 1
    residual_combiner: add
    residual_connections: false
    residual_dense: false

INFO:tensorflow:Creating BeamSearchDecoder in mode=infer
INFO:tensorflow:
BeamSearchDecoder:
  init_scale: 0.04
  max_decode_length: 100
  rnn_cell:
    cell_class: BasicLSTMCell
    cell_params: {num_units: 32}
    dropout_input_keep_prob: 1.0
    dropout_output_keep_prob: 1.0
    num_layers: 1
    residual_combiner: add
    residual_connections: false
    residual_dense: false



In [8]:
encoder_output.final_state

LSTMStateTuple(c=<tf.Tensor 'forward_rnn_encoder/rnn/while/Exit_2:0' shape=(?, 32) dtype=float32>, h=<tf.Tensor 'forward_rnn_encoder/rnn/while/Exit_3:0' shape=(?, 32) dtype=float32>)

In [9]:
beam_helper = decode_helper.GreedyEmbeddingHelper(
    embedding=output_embeddings,
    start_tokens=tf.fill([config.beam_width], EOS),
    end_token=EOS)

#initial_state = bridge()
initial_state = decoder_fn.cell.zero_state(batch_size, dtype=tf.float32)


In [10]:
initial_state

LSTMStateTuple(c=<tf.Tensor 'BasicLSTMCellZeroState/zeros:0' shape=(7, 32) dtype=float32>, h=<tf.Tensor 'BasicLSTMCellZeroState/zeros_1:0' shape=(7, 32) dtype=float32>)

In [11]:
encoder_output.final_state

LSTMStateTuple(c=<tf.Tensor 'forward_rnn_encoder/rnn/while/Exit_2:0' shape=(?, 32) dtype=float32>, h=<tf.Tensor 'forward_rnn_encoder/rnn/while/Exit_3:0' shape=(?, 32) dtype=float32>)

In [12]:

decoder_output, _ = decoder_fn(encoder_output.final_state, beam_helper)

In [13]:
decoder_output

FinalBeamDecoderOutput(predicted_ids=<tf.Tensor 'basic_decoder/ExpandDims_8:0' shape=(?, 1, 3) dtype=int32>, beam_search_output=BeamDecoderOutput(logits=<tf.Tensor 'basic_decoder/ExpandDims:0' shape=(?, 1, 3, 10) dtype=float32>, predicted_ids=<tf.Tensor 'basic_decoder/ExpandDims_1:0' shape=(?, 1, 3) dtype=int32>, log_probs=<tf.Tensor 'basic_decoder/ExpandDims_2:0' shape=(?, 1, 3) dtype=float32>, scores=<tf.Tensor 'basic_decoder/ExpandDims_3:0' shape=(?, 1, 3) dtype=float32>, beam_parent_ids=<tf.Tensor 'basic_decoder/ExpandDims_4:0' shape=(?, 1, 3) dtype=int32>, original_outputs=DecoderOutput(logits=<tf.Tensor 'basic_decoder/ExpandDims_5:0' shape=(?, 1, 3, 10) dtype=float32>, predicted_ids=<tf.Tensor 'basic_decoder/ExpandDims_6:0' shape=(?, 1, 3) dtype=int32>, cell_output=<tf.Tensor 'basic_decoder/ExpandDims_7:0' shape=(?, 1, 3, 32) dtype=float32>)))

![alt text](figure/seq2seq-helper.png)