import numpy
import os
import sys

SRC_VOCAB_SIZE = 50000
TGT_VOCAB_SIZE = 50000
SRC = "en"
TGT = "de"
DATA_DIR = "pmt_corpus/en-de/train/model2"
VALID_DATA_DIR = "pmt_corpus/en-de/tuning/baseline"
from nematus.nmt import train

if __name__ == '__main__':
    validerr = train(saveto='model/en-de.model.npz',
                    dim_word=500,
                    dim=1024,
                    n_words=TGT_VOCAB_SIZE,
                    n_words_src=SRC_VOCAB_SIZE,
                    decay_c=0.,
                    clip_c=1.,
                    lrate=0.0001,
                    optimizer='adadelta',
                    maxlen=50,
                    batch_size=20,
                    valid_batch_size=80,
                    datasets=[DATA_DIR + '/europarl.combined.de-en.txt.clean.norm.clean.bpe.' + SRC, DATA_DIR + '/europarl.combined.de-en.txt.clean.norm.clean.bpe.' + TGT],
                    valid_datasets=[VALID_DATA_DIR + '/europarl.txt.mix.tuning.norm.clean.bpe.' + SRC, VALID_DATA_DIR + '/europarl.txt.mix.tuning.norm.clean.bpe.' + TGT],
                    dictionaries=[DATA_DIR + '/europarl.combined.de-en.txt.clean.norm.clean.bpe.' + SRC + '.json',DATA_DIR + '/europarl.combined.de-en.txt.clean.norm.clean.bpe.' + TGT + '.json'],
                    validFreq=10000,
                    dispFreq=1000,
                    saveFreq=30000,
                    sampleFreq=10000,
                    use_dropout=False,
                    dropout_embedding=0.2, # dropout for input embeddings (0: no dropout)
                    dropout_hidden=0.2, # dropout for hidden layers (0: no dropout)
                    dropout_source=0.1, # dropout source words (0: no dropout)
                    dropout_target=0.1, # dropout target words (0: no dropout)
                    overwrite=False,
		    objective='MRT')
    print validerr