import numpy import os import sys SRC_VOCAB_SIZE = 50000 TGT_VOCAB_SIZE = 50000 SRC = "en" TGT = "de" DATA_DIR = "pmt_corpus/en-de/train/model2" VALID_DATA_DIR = "pmt_corpus/en-de/tuning/baseline" from nematus.nmt import train if __name__ == '__main__': validerr = train(saveto='model/en-de.model.npz', dim_word=500, dim=1024, n_words=TGT_VOCAB_SIZE, n_words_src=SRC_VOCAB_SIZE, decay_c=0., clip_c=1., lrate=0.0001, optimizer='adadelta', maxlen=50, batch_size=20, valid_batch_size=80, datasets=[DATA_DIR + '/europarl.combined.de-en.txt.clean.norm.clean.bpe.' + SRC, DATA_DIR + '/europarl.combined.de-en.txt.clean.norm.clean.bpe.' + TGT], valid_datasets=[VALID_DATA_DIR + '/europarl.txt.mix.tuning.norm.clean.bpe.' + SRC, VALID_DATA_DIR + '/europarl.txt.mix.tuning.norm.clean.bpe.' + TGT], dictionaries=[DATA_DIR + '/europarl.combined.de-en.txt.clean.norm.clean.bpe.' + SRC + '.json',DATA_DIR + '/europarl.combined.de-en.txt.clean.norm.clean.bpe.' + TGT + '.json'], validFreq=10000, dispFreq=1000, saveFreq=30000, sampleFreq=10000, use_dropout=False, dropout_embedding=0.2, # dropout for input embeddings (0: no dropout) dropout_hidden=0.2, # dropout for hidden layers (0: no dropout) dropout_source=0.1, # dropout source words (0: no dropout) dropout_target=0.1, # dropout target words (0: no dropout) overwrite=False, objective='MRT') print validerr