# Tensorflow

### author qhduan@memect.co

In [1]:
import sys
import math
import pickle
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from tensorflow.python.layers.core import Dense

In [2]:
print(tf.__version__)

1.2.0-rc2


In [3]:
# https://github.com/JayParks/tf-seq2seq/blob/master/seq2seq_model.py

In [4]:
sentences = pickle.load(open('诗句.dat', 'rb'))

In [5]:
max_len = 0
data = []
sequence_length = []
for s in sentences['X']:
    q = s[:int(len(s)/2)]
    a = s[int(len(s)/2):]
    if len(q) > max_len:
        max_len = len(q)
    if len(q) > max_len:
        max_len = len(a)
    sequence_length.append(len(a))
    data.append((q, a))

In [6]:
print('max_len is {}, size of data is {}'.format(max_len, len(data)))

max_len is 7, size of data is 15158


In [7]:
tf.set_random_seed(0)

In [8]:
start_tag = '<start>'
end_tag = '<end>'

In [9]:
question_2index = {}
question_2word = {}

answer_2index = {}
answer_2word = {}

for index, word in enumerate(sorted(list(set(''.join(x[0] for x in data))))):
    question_2index[word] = index
    question_2word[index] = word

for index, word in enumerate([start_tag, end_tag] + sorted(list(set(''.join(x[1] for x in data))))):
    answer_2index[word] = index
    answer_2word[index] = word
    
question_size = len(question_2index)
answer_size = len(answer_2index)

In [10]:
start_token = answer_2index[start_tag]
end_token = answer_2index[end_tag]

In [11]:
print(start_tag, start_token, end_tag, end_token)

<start> 0 <end> 1


In [12]:
print('question_size is {}'.format(question_size))
print('answer_size is {}'.format(answer_size))

question_size is 3798
answer_size is 3879


In [13]:
batch_size = 32

In [14]:
encoder_inputs = tf.placeholder(
    dtype=tf.int32,
    shape=(batch_size, max_len),
    name='encoder_inputs'
)
print(encoder_inputs.shape)

(32, 7)


In [15]:
encoder_inputs_length = tf.placeholder(
    dtype=tf.int32,
    shape=(batch_size,),
    name='encoder_inputs_length'
)
print(encoder_inputs_length.shape)

(32,)


In [16]:
decoder_inputs = tf.placeholder(
    dtype=tf.int32,
    shape=(batch_size, max_len),
    name='decoder_inputs'
)
print(decoder_inputs.shape)

(32, 7)


In [17]:
decoder_inputs_length = tf.placeholder(
    dtype=tf.int32,
    shape=(batch_size,),
    name='decoder_inputs_length'
)
print(decoder_inputs_length.shape)

(32,)


In [18]:
decoder_start_token = tf.ones(
    shape=(batch_size, 1),
    dtype=tf.int32
) * start_token

In [19]:
decoder_end_token = tf.ones(
    shape=(batch_size, 1),
    dtype=tf.int32
) * end_token

In [20]:
decoder_inputs_train = tf.concat([
    decoder_start_token, decoder_inputs
], axis=1)
print(decoder_inputs_train.shape)

(32, 8)


In [21]:
decoder_targets_train = tf.concat([
    decoder_inputs, decoder_end_token
], axis=1)
print(decoder_targets_train.shape)

(32, 8)


In [22]:
decoder_inputs_weights = tf.placeholder(
    dtype=tf.float32,
    shape=(batch_size, max_len + 1),
    name='decoder_inputs_weights'
)
print(decoder_inputs_weights.shape)

(32, 8)


In [23]:
embedding_size = 128
sqrt3 = math.sqrt(3)
initializer = tf.random_uniform_initializer(
    -sqrt3,
    sqrt3,
    dtype=tf.float32
)

In [24]:
encoder_embeddings = tf.get_variable(
    name='encoder_embeddings',
    shape=(question_size, embedding_size),
    initializer=initializer,
    dtype=tf.float32
)

In [25]:
encoder_inputs_embedded = tf.nn.embedding_lookup(
    params=encoder_embeddings,
    ids=encoder_inputs
)

In [26]:
encoder_cell = tf.contrib.rnn.LSTMCell(256)

In [27]:
encoder_outputs, encoder_last_state = tf.nn.dynamic_rnn(
    cell=encoder_cell,
    inputs=encoder_inputs_embedded,
    sequence_length=encoder_inputs_length,
    dtype=tf.float32,
    time_major=False
)

In [28]:
decoder_embeddings = tf.get_variable(
    name='ecoder_embeddings',
    shape=(answer_size, embedding_size),
    initializer=initializer,
    dtype=tf.float32
)

In [29]:
input_layer = Dense(
    512,
    dtype=tf.float32,
    name='input_projection'
)

In [30]:
decoder_inputs_embedded = input_layer(tf.nn.embedding_lookup(
    params=decoder_embeddings,
    ids=decoder_inputs_train
))
print(decoder_inputs_embedded.shape)

(32, 8, 512)


In [31]:
decoder_inputs_length_train = decoder_inputs_length + 1

In [32]:
training_helper = tf.contrib.seq2seq.TrainingHelper(
    inputs=decoder_inputs_embedded,
    sequence_length=decoder_inputs_length_train,
    time_major=False,
    name='training_helper'
)

In [33]:
decoder_cell = tf.contrib.rnn.LSTMCell(256)

In [34]:
output_layer = Dense(
    answer_size,
    name='output_projection'
)

In [35]:
# initial_state = [state for state in encoder_last_state]
initial_state = encoder_last_state

In [36]:
# initial_state[-1] = decoder_cell.zero_state(
#     batch_size=batch_size,
#     dtype=tf.float32
# )

In [37]:
# initial_state = tuple(initial_state)

In [38]:
training_decoder = tf.contrib.seq2seq.BasicDecoder(
    cell=decoder_cell,
    helper=training_helper,
    initial_state=initial_state,
    output_layer=output_layer
)

In [39]:
training_decoder.output_size

BasicDecoderOutput(rnn_output=TensorShape([Dimension(3879)]), sample_id=TensorShape([]))

In [40]:
max_decoder_length = tf.reduce_max(
    decoder_inputs_length_train
)

In [41]:
(
    decoder_outputs_train,
    decoder_last_state_train,
    decoder_outputs_length_decode
) = tf.contrib.seq2seq.dynamic_decode(
    decoder=training_decoder,
    output_time_major=False,
    impute_finished=True,
    maximum_iterations=max_decoder_length
)

In [43]:
print(decoder_outputs_length_decode.shape)

(32,)


In [44]:
print(decoder_outputs_train.rnn_output)

Tensor("decoder/transpose:0", shape=(32, ?, 3879), dtype=float32)


In [45]:
decoder_logits_train = tf.identity(
    decoder_outputs_train.rnn_output
)
print(decoder_logits_train.shape)

(32, ?, 3879)


In [46]:
decoder_pred_train = tf.argmax(
    decoder_logits_train, axis=-1,
    name='decoder_pred_train'
)
print(decoder_pred_train.shape)

(32, ?)


In [47]:
masks = tf.sequence_mask(
    lengths=decoder_inputs_length_train,
    maxlen=max_decoder_length,
    dtype=tf.float32,
    name='masks'
)
print(masks.shape)

(32, ?)


In [48]:
print(decoder_logits_train.shape)

(32, ?, 3879)


In [49]:
print(decoder_targets_train.shape)

(32, 8)


In [50]:
print(masks.shape)

(32, ?)


In [51]:
loss = tf.contrib.seq2seq.sequence_loss(
    logits=decoder_logits_train,
    targets=decoder_targets_train,
    weights=decoder_inputs_weights,
    average_across_timesteps=True,
    average_across_batch=True
)

In [52]:
trainable_params = tf.trainable_variables()

In [53]:
opt = tf.train.AdamOptimizer(
    learning_rate=0.001
)

In [54]:
gradients = tf.gradients(loss, trainable_params)

In [55]:
clip_gradients, _ = tf.clip_by_global_norm(
    gradients, 1.0
)

In [56]:
global_step = tf.Variable(0, trainable=False, name='global_step')

In [57]:
updates = opt.apply_gradients(
    zip(gradients, trainable_params),
    global_step=global_step
)

In [75]:
def embed_and_input_proj(inputs):
    return input_layer(
        tf.nn.embedding_lookup(decoder_embeddings, inputs)
    )

start_tokens = tf.ones([batch_size,], tf.int32) * start_token

decoding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
    start_tokens=start_tokens,
    end_token=end_token,
    embedding=embed_and_input_proj
)

inference_decoder = tf.contrib.seq2seq.BasicDecoder(
    cell=decoder_cell,
    helper=decoding_helper,
    initial_state=initial_state,
    output_layer=output_layer
)

(
    decoder_outputs_decode,
    decoder_last_state_decode,
    decoder_outputs_length_decode
) = tf.contrib.seq2seq.dynamic_decode(
    decoder=inference_decoder,
    output_time_major=False,
    # impute_finished=True,	# error occurs
    maximum_iterations=max_len
)

In [76]:
decoder_pred_decode = tf.expand_dims(
    decoder_outputs_decode.sample_id,
    -1
)

In [77]:
def batch_flow(
    data,
    question_2index, answer_2index, max_len, batch_size=4
):
    X = []
    Y = []
    XL = []
    YL = []
    W = []
    for q, a in data:
        if len(X) == batch_size:
            yield (
                np.array(X),
                np.array(XL),
                np.array(Y),
                np.array(YL),
                np.array(W)
            )
            X = []
            XL = []
            Y = []
            YL = []
            W = []
        x = [1] * max_len
        for ind, qq in enumerate(list(q)):
            x[ind] = question_2index[qq]
        y = [1] * max_len
        w = [0] * (max_len + 1)
        for ind, aa in enumerate(list(a)):
            y[ind] = answer_2index[aa]
            w[ind] = 1.0
        w[ind + 1] = 1.0
        X.append(x)
        Y.append(y)
        XL.append(len(q))
        YL.append(max_len)
        W.append(w)

In [78]:
for x, xl, y, yl, w in batch_flow(
    data, question_2index, answer_2index, max_len, 4
):
    print(x.shape, xl.shape, y.shape, yl.shape, w.shape)
    print('-' * 10)
    print(x)
    print('-' * 10)
    print(xl)
    print('-' * 10)
    print(y)
    print('-' * 10)
    print(yl)
    print('-' * 10)
    print(w)
    break

(4, 7) (4,) (4, 7) (4,) (4, 8)
----------
[[ 678 3494   49   21  157    1    1]
 [2056 3580  728 2830 2751    1    1]
 [3794 2867 2190  544 3588    1    1]
 [1670  665  153 1067 2745    1    1]]
----------
[5 5 5 5]
----------
[[ 483 2985 2392 3097 3538    1    1]
 [1198  582  669 2795 1645    1    1]
 [ 243 1190 3332 2056 2347    1    1]
 [ 509 1101 3182 3733   86    1    1]]
----------
[7 7 7 7]
----------
[[ 1.  1.  1.  1.  1.  1.  0.  0.]
 [ 1.  1.  1.  1.  1.  1.  0.  0.]
 [ 1.  1.  1.  1.  1.  1.  0.  0.]
 [ 1.  1.  1.  1.  1.  1.  0.  0.]]


In [79]:
init = tf.global_variables_initializer() 

In [80]:
n_epoch = 50

In [81]:
steps = 100

In [88]:
# loss = tf.contrib.seq2seq.sequence_loss(
#     logits=decoder_logits_train,
#     targets=decoder_targets_train,
#     weights=masks,
#     average_across_timesteps=True,
#     average_across_batch=True
# )

In [89]:
def get_result(outputs):
    print([
        [answer_2word[item[0]] for item in batch]
        for batch in outputs
    ])

In [90]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epoch):
        print('epoch {}'.format(
            epoch
        ))
        costs = []
        
        for x, xl, y, yl, w in tqdm(batch_flow(
            data, question_2index, answer_2index, max_len, batch_size
        ), total=steps, file=sys.stdout):
            
#             input_feed = {
#                 encoder_inputs: x,
#                 encoder_inputs_length: xl,
#                 decoder_inputs: y,
#                 decoder_inputs_length: yl
#             }
            
#             t = sess.run([
#                 decoder_logits_train,
#                 decoder_targets_train,
#                 masks
#             ], input_feed)
#             break
#             print(x.shape, xl.shape, y.shape, yl.shape)
            
            input_feed = {
                encoder_inputs: x,
                encoder_inputs_length: xl,
                decoder_inputs: y,
                decoder_inputs_length: yl,
                decoder_inputs_weights: w
            }
            output_feed = [updates, loss]
            _, c = sess.run(output_feed, input_feed)
            costs.append(c)
            if len(costs) >= steps:
                break
#         break
        print('')
        print('cost: {:.4f}'.format(
            np.mean(costs)
        ))
    saver = tf.train.Saver(None)
    save_path = saver.save(
        sess,
        save_path='model/',
        global_step=global_step
    )
    print('model saved at %s' % save_path)
    
    for x, xl, y, yl, w in batch_flow(
            data, question_2index, answer_2index, max_len, batch_size
    ):
        input_feed = {
            encoder_inputs: x,
            encoder_inputs_length: xl
        }
        outputs = sess.run([decoder_pred_decode], input_feed)
        print(get_result(outputs[0]))
        break

epoch 0

  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<00:16,  6.17it/s][A
  2%|▏         | 2/100 [00:00<00:14,  6.82it/s][A
  3%|▎         | 3/100 [00:00<00:13,  7.37it/s][A
  4%|▍         | 4/100 [00:00<00:12,  7.76it/s][A
  5%|▌         | 5/100 [00:00<00:11,  8.06it/s][A
  6%|▌         | 6/100 [00:00<00:11,  8.40it/s][A
  7%|▋         | 7/100 [00:00<00:11,  8.32it/s][A
  8%|▊         | 8/100 [00:00<00:10,  8.57it/s][A
  9%|▉         | 9/100 [00:01<00:10,  8.68it/s][A
 10%|█         | 10/100 [00:01<00:10,  8.71it/s][A
 11%|█         | 11/100 [00:01<00:10,  8.55it/s][A
 12%|█▏        | 12/100 [00:01<00:10,  8.57it/s][A
 13%|█▎        | 13/100 [00:01<00:09,  8.76it/s][A
 14%|█▍        | 14/100 [00:01<00:09,  8.89it/s][A
 15%|█▌        | 15/100 [00:01<00:09,  8.96it/s][A
 16%|█▌        | 16/100 [00:01<00:09,  9.07it/s][A
 17%|█▋        | 17/100 [00:01<00:09,  9.11it/s][A
 18%|█▊        | 18/100 [00:02<00:08,  9.12it/s][A
 19%|█▉        | 19/1