Skip to content

Commit

Permalink
Raises AssertionError on Incomplete Vocabulary
Browse files Browse the repository at this point in the history
fixes issue #621
added a new function CheckVocab, to check for presence of a word in vocabulary
  • Loading branch information
pranay360 committed Dec 8, 2016
1 parent 821d3da commit 27a178a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
6 changes: 6 additions & 0 deletions textsum/data.py
Expand Up @@ -56,6 +56,12 @@ def __init__(self, vocab_file, max_size):
if self._count > max_size:
raise ValueError('Too many words: >%d.' % max_size)

def CheckVocab(self, word):
if word not in self._word_to_id:
return None
return self._word_to_id[word]


def WordToId(self, word):
if word not in self._word_to_id:
return self._word_to_id[UNKNOWN_TOKEN]
Expand Down
8 changes: 4 additions & 4 deletions textsum/seq2seq_attention.py
Expand Up @@ -160,10 +160,10 @@ def _Eval(model, data_batcher, vocab=None):
def main(unused_argv):
vocab = data.Vocab(FLAGS.vocab_path, 1000000)
# Check for presence of required special tokens.
assert vocab.WordToId(data.PAD_TOKEN) > 0
assert vocab.WordToId(data.UNKNOWN_TOKEN) >= 0
assert vocab.WordToId(data.SENTENCE_START) > 0
assert vocab.WordToId(data.SENTENCE_END) > 0
assert vocab.CheckVocab(data.PAD_TOKEN) > 0
assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0
assert vocab.CheckVocab(data.SENTENCE_START) > 0
assert vocab.CheckVocab(data.SENTENCE_END) > 0

batch_size = 4
if FLAGS.mode == 'decode':
Expand Down

0 comments on commit 27a178a

Please sign in to comment.