From 8dd401af7bc97644dac71ddd56c2ea558dd9e4a0 Mon Sep 17 00:00:00 2001
From: Jinming Hu <hjm946637@hotmail.com>
Date: Tue, 27 Dec 2016 17:34:16 +0800
Subject: [PATCH] Update nlp.py

Fix some bugs like https://github.com/tensorflow/tensorflow/issues/5118.
---
 tensorlayer/nlp.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py
index 5705140e9..953f2b38b 100644
--- a/tensorlayer/nlp.py
+++ b/tensorlayer/nlp.py
@@ -744,6 +744,7 @@ def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")):
   - Code from ``/tensorflow/models/rnn/translation/data_utils.py``
   """
   words = []
+  sentence = tf.compat.as_bytes(sentence)
   for space_separated_fragment in sentence.strip().split():
     words.extend(re.split(_WORD_SPLIT, space_separated_fragment))
   return [w for w in words if w]
@@ -840,7 +841,7 @@ def initialize_vocabulary(vocabulary_path):
     rev_vocab = []
     with gfile.GFile(vocabulary_path, mode="rb") as f:
       rev_vocab.extend(f.readlines())
-    rev_vocab = [line.strip() for line in rev_vocab]
+    rev_vocab = [tf.compat.as_bytes(line.strip()) for line in rev_vocab]
     vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)])
     return vocab, rev_vocab
   else: