From 8dd401af7bc97644dac71ddd56c2ea558dd9e4a0 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Tue, 27 Dec 2016 17:34:16 +0800 Subject: [PATCH] Update nlp.py Fix some bugs like https://github.com/tensorflow/tensorflow/issues/5118. --- tensorlayer/nlp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py index 5705140e9..953f2b38b 100644 --- a/tensorlayer/nlp.py +++ b/tensorlayer/nlp.py @@ -744,6 +744,7 @@ def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")): - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` """ words = [] + sentence = tf.compat.as_bytes(sentence) for space_separated_fragment in sentence.strip().split(): words.extend(re.split(_WORD_SPLIT, space_separated_fragment)) return [w for w in words if w] @@ -840,7 +841,7 @@ def initialize_vocabulary(vocabulary_path): rev_vocab = [] with gfile.GFile(vocabulary_path, mode="rb") as f: rev_vocab.extend(f.readlines()) - rev_vocab = [line.strip() for line in rev_vocab] + rev_vocab = [tf.compat.as_bytes(line.strip()) for line in rev_vocab] vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)]) return vocab, rev_vocab else: