diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py index 5705140e9..953f2b38b 100644 --- a/tensorlayer/nlp.py +++ b/tensorlayer/nlp.py @@ -744,6 +744,7 @@ def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")): - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` """ words = [] + sentence = tf.compat.as_bytes(sentence) for space_separated_fragment in sentence.strip().split(): words.extend(re.split(_WORD_SPLIT, space_separated_fragment)) return [w for w in words if w] @@ -840,7 +841,7 @@ def initialize_vocabulary(vocabulary_path): rev_vocab = [] with gfile.GFile(vocabulary_path, mode="rb") as f: rev_vocab.extend(f.readlines()) - rev_vocab = [line.strip() for line in rev_vocab] + rev_vocab = [tf.compat.as_bytes(line.strip()) for line in rev_vocab] vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)]) return vocab, rev_vocab else: