In [1]:
%reset
from collections import defaultdict
import dynet as dy
import random
import time

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [2]:
train_src_file = "../nn4nlp2017-code-master/data/parallel/train.ja"
train_trg_file = "../nn4nlp2017-code-master/data/parallel/train.en"
test_src_file = "../nn4nlp2017-code-master/data/parallel/test.ja"
test_trg_file = "../nn4nlp2017-code-master/data/parallel/test.en"

In [3]:
w2i_src = defaultdict(lambda: len(w2i_src))
w2i_trg = defaultdict(lambda: len(w2i_trg))

sos_sym = '<s>'
eos_sym = '</s>'
unk_sym = "<unk>"

sos_src = w2i_src[sos_sym]
sos_trg = w2i_trg[sos_sym]
eos_src = w2i_src[eos_sym]
eos_trg = w2i_trg[eos_sym]
unk_src = w2i_src[unk_sym]
unk_trg = w2i_trg[unk_sym]

### Read the data
def read_data(file_src, file_trg):
    with open(file_src, "r") as f_src, open(file_trg, "r") as f_trg:
        for l_src, l_trg in zip(f_src, f_trg):
            s_src = [w2i_src[w] for w in l_src.strip().split() + [eos_sym]]
            s_trg = [w2i_trg[w] for w in l_trg.strip().split() + [eos_sym]]
            yield s_src, s_trg
            
train = list(read_data(train_src_file, train_trg_file))
w2i_src = defaultdict(lambda: unk_src, w2i_src)
w2i_trg = defaultdict(lambda: unk_trg, w2i_trg)

nWords_src = len(w2i_src) 
nWords_trg = len(w2i_trg)

In [4]:
# dynet
model = dy.Model()
trainier = dy.AdamTrainer(model)

# Model parameters 
EMB_SIZE = 64
HIDDEN_SIZE = 128

# Max sent len
MAX_SENT_LEN = 50

# Lookup parameters
LOOK_UP_SRC = model.add_lookup_parameters((nWords_src, EMB_SIZE))
LOOK_UP_TRG = model.add_lookup_parameters((nWords_trg, EMB_SIZE))

# Word level GRUs
GRU_BUILDER_SRC = dy.GRUBuilder(1, EMB_SIZE, HIDDEN_SIZE, model) 
GRU_BUILDER_TRG = dy.GRUBuilder(1, EMB_SIZE, HIDDEN_SIZE, model)

# softmax from hidden state
W_sm_p = model.add_parameters((nWords_trg, HIDDEN_SIZE))
b_sm_p = model.add_parameters((nWords_trg))

In [5]:
def calc_loss(sent):
    sent_src = sent[0]
    sent_trg = sent[1]
    
    # parameter ---> exp
    W_sm_exp = dy.parameter(W_sm_p)
    b_sm_exp = dy.parameter(b_sm_p)

    # Encode the src sentence into an output vector
    src_state = GRU_BUILDER_SRC.initial_state()
    for w_id_src in sent_src:
        src_state = src_state.add_input(LOOK_UP_SRC[w_id_src])
    src_output = src_state.output()
    
    # Set the initial target gru state as the output of the source gru state
    trg_state = GRU_BUILDER_TRG.initial_state().set_h([src_output])
    
    loss_exps = []
    prev_word = sent_trg[0]
    for w_id_trg in sent_trg[1:]:
        trg_state = trg_state.add_input(LOOK_UP_TRG[prev_word])
        loss_exps.append(dy.pickneglogsoftmax(dy.affine_transform([b_sm_exp, W_sm_exp, trg_state.output()]), 
                                              w_id_trg))
        prev_word = w_id_trg
    
    return dy.esum(loss_exps)

In [None]:
for ITER in range(2):
    random.shuffle(train)
    train_loss = 0
    train_words = 0
    start = time.time()
    for s_id, sent in enumerate(train):
        loss_exp = calc_loss(sent)
        train_loss += loss_exp.value()
        train_words += len(sent)
        loss_exp.backward()
        trainier.update()
        if (s_id+1) % 100 == 0:
            print("finished {} sentences".format(s_id + 1))
            print("train_loss/train_words = {}".format(train_loss/train_words))
    iter_time = time.time() - start
    print("ITER = {}, train_loss/train_words = {}, time = {}".format(ITER, 
                                                                     train_loss/train_words, 
                                                                     iter_time))