add vocab_size to sentencepiece

n-waves · Nov 19, 2018 · 5ffbe8b · 5ffbe8b
1 parent 69ed7b4
commit 5ffbe8b
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/ulmfit/pretrain_lm.py b/ulmfit/pretrain_lm.py
@@ -70,7 +70,7 @@ def pretrain_lm(dir_path, lang='en', cuda_id=0, qrnn=True, subword=False, max_vo
         read_file(trn_path, 'train')
         read_file(val_path, 'valid')
 
-        sp = get_sentencepiece(dir_path, trn_path, name)
+        sp = get_sentencepiece(dir_path, trn_path, name, vocab_size=max_vocab)
 
         data_lm = TextLMDataBunch.from_csv(dir_path, **sp)
         itos = data_lm.train_ds.vocab.itos