In [1]:
!pip install dynet
!git clone https://github.com/neubig/nn4nlp-code.git

Collecting dynet
[33m  Cache entry deserialization failed, entry ignored[0m
[?25l  Downloading https://files.pythonhosted.org/packages/1b/8c/767cc83241b2abe567d705f87589d8ad44cca321f7c78720269c45e0469f/dyNET-2.0.3-cp36-cp36m-manylinux1_x86_64.whl (27.8MB)
[K    100% |████████████████████████████████| 27.8MB 1.5MB/s 
Collecting cython (from dynet)
[33m  Cache entry deserialization failed, entry ignored[0m
[?25l  Downloading https://files.pythonhosted.org/packages/6f/79/d8e2cd00bea8156a995fb284ce7b6677c49eccd2d318f73e201a9ce560dc/Cython-0.28.3-cp36-cp36m-manylinux1_x86_64.whl (3.4MB)
[K    100% |████████████████████████████████| 3.4MB 13.8MB/s 
[?25hInstalling collected packages: cython, dynet
Successfully installed cython-0.28.3 dynet-2.0.3
fatal: destination path 'nn4nlp-code' already exists and is not an empty directory.


In [2]:
from __future__ import print_function
import time

from collections import defaultdict
import random
import math
import sys
import argparse

import dynet as dy
import numpy as np

In [3]:
# format of files: each line is "word1 word2 ..." aligned line-by-line
train_src_file = "nn4nlp-code/data/parallel/train.ja"
train_trg_file = "nn4nlp-code/data/parallel/train.en"
dev_src_file = "nn4nlp-code/data/parallel/dev.ja"
dev_trg_file = "nn4nlp-code/data/parallel/dev.en"

w2i_src = defaultdict(lambda: len(w2i_src))
w2i_trg = defaultdict(lambda: len(w2i_trg))

def read(fname_src, fname_trg):
    """
    Read parallel files where each line lines up
    """
    with open(fname_src, "r") as f_src, open(fname_trg, "r") as f_trg:
        for line_src, line_trg in zip(f_src, f_trg):
            sent_src = [w2i_src[x] for x in line_src.strip().split()]
            sent_trg = [w2i_trg[x] for x in line_trg.strip().split()]
            yield (sent_src, sent_trg)

# Read the data
train = list(read(train_src_file, train_trg_file))
unk_src = w2i_src["<unk>"]
w2i_src = defaultdict(lambda: unk_src, w2i_src)
unk_trg = w2i_trg["<unk>"]
w2i_trg = defaultdict(lambda: unk_trg, w2i_trg)
nwords_src = len(w2i_src)
nwords_trg = len(w2i_trg)
dev = list(read(dev_src_file, dev_trg_file))

In [4]:
# DyNet Starts
model = dy.Model()
trainer = dy.AdamTrainer(model)

# Model parameters
EMBED_SIZE = 64
HIDDEN_SIZE = 128
BATCH_SIZE = 16

# Lookup parameters for word embeddings
LOOKUP_SRC = model.add_lookup_parameters((nwords_src, EMBED_SIZE))
LOOKUP_TRG = model.add_lookup_parameters((nwords_trg, EMBED_SIZE))

# Word-level BiLSTMs
LSTM_SRC_FWD = dy.LSTMBuilder(1, EMBED_SIZE, HIDDEN_SIZE/2, model)
LSTM_SRC_BWD = dy.LSTMBuilder(1, EMBED_SIZE, HIDDEN_SIZE/2, model)
LSTM_TRG_FWD = dy.LSTMBuilder(1, EMBED_SIZE, HIDDEN_SIZE/2, model)
LSTM_TRG_BWD = dy.LSTMBuilder(1, EMBED_SIZE, HIDDEN_SIZE/2, model)

def encode_sents(look, fwd, bwd, sents):
    embs = [[look[x] for x in sent] for sent in sents]
    return [dy.concatenate([fwd.transduce(x)[-1], bwd.transduce(x)[-1]]) for x in embs]

# Calculate loss for one mini-batch
def calc_loss(sents):
    dy.renew_cg()

    src_fwd = LSTM_SRC_FWD.initial_state()
    src_bwd = LSTM_SRC_BWD.initial_state()
    trg_fwd = LSTM_TRG_FWD.initial_state()
    trg_bwd = LSTM_TRG_BWD.initial_state()

    # Encoding
    src_reps = encode_sents(LOOKUP_SRC, src_fwd, src_bwd, [src for src, trg in sents])
    trg_reps = encode_sents(LOOKUP_TRG, trg_fwd, trg_bwd, [trg for src, trg in sents])

    # Concatenate the sentence representations to a single matrix
    mtx_src = dy.concatenate_cols(src_reps)
    mtx_trg = dy.concatenate_cols(trg_reps)

    # Do matrix multiplication to get a matrix of dot product similarity scores
    sim_mtx = dy.transpose(mtx_src) * mtx_trg

    # Calculate the hinge loss over all dimensions 
    loss = dy.hinge_dim(sim_mtx, list(range(len(sents))), d=1)

    return dy.sum_elems(loss)

# Calculate representations for one corpus
def index_corpus(sents):
    
    # To take advantage of auto-batching, do several at a time
    for sid in range(0, len(sents), BATCH_SIZE):
        dy.renew_cg()

        src_fwd = LSTM_SRC_FWD.initial_state()
        src_bwd = LSTM_SRC_BWD.initial_state()
        trg_fwd = LSTM_TRG_FWD.initial_state()
        trg_bwd = LSTM_TRG_BWD.initial_state()
        
        # Set up the computation graph
        src_exprs = encode_sents(LOOKUP_SRC, src_fwd, src_bwd, [src for src, trg in sents[sid:min(sid+BATCH_SIZE,len(sents))]])
        trg_exprs = encode_sents(LOOKUP_TRG, trg_fwd, trg_bwd, [trg for src, trg in sents[sid:min(sid+BATCH_SIZE,len(sents))]])

        # Perform the forward pass to calculate everything at once
        trg_exprs[-1][1].forward()

        for src_expr, trg_expr in zip(src_exprs, trg_exprs):
            yield (src_expr.npvalue(), trg_expr.npvalue())

# Perform retrieval, and return both scores and ranked order of candidates
def retrieve(src, db_mtx):
    scores = np.dot(db_mtx,src)
    ranks = np.argsort(-scores)
    return ranks, scores

In [5]:
# Perform training
start = time.time()
train_mbs = all_time = dev_time = all_tagged = this_sents = this_loss = 0
for ITER in range(100):
    random.shuffle(train)
    for sid in range(0, len(train), BATCH_SIZE):
        my_size = min(BATCH_SIZE, len(train)-sid)
        train_mbs += 1
        if train_mbs % int(1000/BATCH_SIZE) == 0:
            trainer.status()
            print("loss/sent=%.4f, sent/sec=%.4f" % (this_loss / this_sents, (train_mbs * BATCH_SIZE) / (time.time() - start - dev_time)), file=sys.stderr)
            this_loss = this_sents = 0
        # train on the minibatch
        loss_exp = calc_loss(train[sid:sid+BATCH_SIZE])
        this_loss += loss_exp.scalar_value()
        this_sents += BATCH_SIZE
        loss_exp.backward()
        trainer.update()
    # Perform evaluation 
    dev_start = time.time()
    rec_at_1, rec_at_5, rec_at_10 = 0, 0, 0
    reps = list(index_corpus(dev))
    trg_mtx = np.stack([trg for src, trg in reps])
    for i, (src, trg) in enumerate(reps):
        ranks, scores = retrieve(src, trg_mtx)
        if ranks[0] == i: rec_at_1 += 1
        if i in ranks[:5]: rec_at_5 += 1
        if i in ranks[:10]: rec_at_10 += 1
    dev_time += time.time()-dev_start
    print("epoch %r: dev recall@1=%.2f%% recall@5=%.2f%% recall@10=%.2f%%" % (ITER, rec_at_1/len(dev)*100, rec_at_5/len(dev)*100, rec_at_10/len(dev)*100))

loss/sent=14.6606, sent/sec=441.1204
loss/sent=11.2567, sent/sec=457.3669
loss/sent=7.8080, sent/sec=461.1460
loss/sent=6.7649, sent/sec=465.6212
loss/sent=6.3778, sent/sec=469.7716
loss/sent=5.4452, sent/sec=469.8234
loss/sent=4.5126, sent/sec=469.8680
loss/sent=4.2842, sent/sec=472.1616
loss/sent=4.0363, sent/sec=471.7079
loss/sent=3.9009, sent/sec=470.7717


epoch 0: dev recall@1=5.40% recall@5=16.20% recall@10=27.80%


loss/sent=3.1877, sent/sec=468.5226
loss/sent=3.2156, sent/sec=466.8912
loss/sent=2.9370, sent/sec=466.5214
loss/sent=2.5584, sent/sec=465.8726
loss/sent=2.7086, sent/sec=465.0600
loss/sent=2.5970, sent/sec=463.5587
loss/sent=2.4225, sent/sec=463.5605
loss/sent=2.1756, sent/sec=463.2136
loss/sent=2.1885, sent/sec=462.8532
loss/sent=2.1517, sent/sec=461.9013


epoch 1: dev recall@1=6.60% recall@5=25.80% recall@10=41.20%


loss/sent=1.8052, sent/sec=461.0218
loss/sent=1.8584, sent/sec=460.2186
loss/sent=1.6299, sent/sec=459.9593
loss/sent=1.7708, sent/sec=460.4858
loss/sent=1.7653, sent/sec=461.4434
loss/sent=1.5822, sent/sec=461.9925
loss/sent=1.4675, sent/sec=462.2106
loss/sent=1.3749, sent/sec=462.3921
loss/sent=1.5649, sent/sec=462.1086
loss/sent=1.4729, sent/sec=462.3705


epoch 2: dev recall@1=11.40% recall@5=34.60% recall@10=48.00%


loss/sent=1.2068, sent/sec=462.0755
loss/sent=1.2322, sent/sec=462.2854
loss/sent=1.0416, sent/sec=462.4715
loss/sent=1.0904, sent/sec=462.5669
loss/sent=1.0442, sent/sec=462.6585
loss/sent=1.0140, sent/sec=462.7672
loss/sent=1.1426, sent/sec=462.7418
loss/sent=1.0948, sent/sec=462.9282
loss/sent=1.0284, sent/sec=463.7044
loss/sent=0.9869, sent/sec=464.3950


epoch 3: dev recall@1=12.40% recall@5=38.00% recall@10=53.00%


loss/sent=0.8727, sent/sec=464.3058
loss/sent=0.6859, sent/sec=464.7335
loss/sent=0.8336, sent/sec=464.7263
loss/sent=0.8684, sent/sec=464.5944
loss/sent=0.6738, sent/sec=465.2342
loss/sent=0.8120, sent/sec=465.7262
loss/sent=0.7423, sent/sec=466.0487
loss/sent=0.7483, sent/sec=465.7502
loss/sent=0.7736, sent/sec=465.8178
loss/sent=0.7661, sent/sec=465.6280


epoch 4: dev recall@1=15.20% recall@5=39.20% recall@10=53.60%


loss/sent=0.5796, sent/sec=465.5666
loss/sent=0.5099, sent/sec=465.6963
loss/sent=0.5675, sent/sec=465.7721
loss/sent=0.5710, sent/sec=465.8923
loss/sent=0.5994, sent/sec=465.7588
loss/sent=0.5259, sent/sec=465.9682
loss/sent=0.5404, sent/sec=465.9895
loss/sent=0.6262, sent/sec=466.0074
loss/sent=0.6272, sent/sec=465.7724
loss/sent=0.5949, sent/sec=465.4872


epoch 5: dev recall@1=15.20% recall@5=41.20% recall@10=53.00%


loss/sent=0.5233, sent/sec=465.1568
loss/sent=0.4605, sent/sec=465.1631
loss/sent=0.4620, sent/sec=464.9826
loss/sent=0.4666, sent/sec=464.5564
loss/sent=0.4486, sent/sec=464.3713
loss/sent=0.4447, sent/sec=464.1965
loss/sent=0.4339, sent/sec=464.0437
loss/sent=0.4961, sent/sec=463.9985
loss/sent=0.4419, sent/sec=464.0988
loss/sent=0.5555, sent/sec=463.9735


epoch 6: dev recall@1=16.40% recall@5=41.80% recall@10=56.40%


loss/sent=0.4205, sent/sec=463.8558
loss/sent=0.3751, sent/sec=464.0443
loss/sent=0.3597, sent/sec=464.1284
loss/sent=0.3337, sent/sec=464.0329
loss/sent=0.3574, sent/sec=463.9244
loss/sent=0.4258, sent/sec=464.0070
loss/sent=0.3880, sent/sec=463.7679
loss/sent=0.3906, sent/sec=463.2342
loss/sent=0.3804, sent/sec=462.8325
loss/sent=0.3811, sent/sec=462.6023


epoch 7: dev recall@1=15.20% recall@5=43.60% recall@10=58.60%


loss/sent=0.3909, sent/sec=462.3215
loss/sent=0.3162, sent/sec=462.2319
loss/sent=0.2919, sent/sec=462.3460
loss/sent=0.3303, sent/sec=462.3759
loss/sent=0.3304, sent/sec=462.6488
loss/sent=0.3081, sent/sec=462.7079
loss/sent=0.3333, sent/sec=462.9707
loss/sent=0.3355, sent/sec=463.0239
loss/sent=0.2745, sent/sec=463.0985
loss/sent=0.2993, sent/sec=463.2117


epoch 8: dev recall@1=14.60% recall@5=46.60% recall@10=60.20%


loss/sent=0.2869, sent/sec=463.1303
loss/sent=0.2926, sent/sec=463.2142
loss/sent=0.3065, sent/sec=463.4259
loss/sent=0.2493, sent/sec=463.4178
loss/sent=0.2410, sent/sec=463.4311
loss/sent=0.2488, sent/sec=463.2617
loss/sent=0.2308, sent/sec=463.0485
loss/sent=0.2652, sent/sec=462.8439
loss/sent=0.2530, sent/sec=462.6851
loss/sent=0.2636, sent/sec=462.5690


epoch 9: dev recall@1=17.60% recall@5=47.20% recall@10=61.40%


loss/sent=0.3139, sent/sec=462.4451
loss/sent=0.2406, sent/sec=462.2717
loss/sent=0.2643, sent/sec=461.9448
loss/sent=0.2135, sent/sec=461.6051
loss/sent=0.2693, sent/sec=461.4599
loss/sent=0.2126, sent/sec=461.5309
loss/sent=0.2542, sent/sec=461.3930
loss/sent=0.2667, sent/sec=461.3739
loss/sent=0.2511, sent/sec=461.3141
loss/sent=0.2479, sent/sec=461.2194


epoch 10: dev recall@1=17.40% recall@5=45.40% recall@10=63.00%


loss/sent=0.2306, sent/sec=461.0449
loss/sent=0.2121, sent/sec=460.9981
loss/sent=0.1947, sent/sec=460.8253
loss/sent=0.1790, sent/sec=460.7254
loss/sent=0.2067, sent/sec=460.7298
loss/sent=0.2547, sent/sec=460.6789
loss/sent=0.2166, sent/sec=460.7460
loss/sent=0.2326, sent/sec=460.7376
loss/sent=0.2356, sent/sec=460.5753
loss/sent=0.1917, sent/sec=460.4922


epoch 11: dev recall@1=18.80% recall@5=45.00% recall@10=61.00%


loss/sent=0.2315, sent/sec=460.5071
loss/sent=0.2031, sent/sec=460.3916
loss/sent=0.1751, sent/sec=460.4014
loss/sent=0.1869, sent/sec=460.4307
loss/sent=0.2009, sent/sec=460.3948
loss/sent=0.1761, sent/sec=460.3740
loss/sent=0.1426, sent/sec=460.2916
loss/sent=0.2083, sent/sec=460.2390
loss/sent=0.2180, sent/sec=460.2551
loss/sent=0.1750, sent/sec=460.2389
loss/sent=0.2370, sent/sec=460.4053


epoch 12: dev recall@1=18.80% recall@5=48.40% recall@10=63.00%


loss/sent=0.1904, sent/sec=460.3711
loss/sent=0.1964, sent/sec=460.3158
loss/sent=0.2206, sent/sec=460.3963
loss/sent=0.1709, sent/sec=460.4620
loss/sent=0.1704, sent/sec=460.5772
loss/sent=0.1742, sent/sec=460.5147
loss/sent=0.1954, sent/sec=460.6131
loss/sent=0.1605, sent/sec=460.6385
loss/sent=0.1553, sent/sec=460.7944
loss/sent=0.1673, sent/sec=460.7842


epoch 13: dev recall@1=21.20% recall@5=46.00% recall@10=61.80%


loss/sent=0.1975, sent/sec=460.8305
loss/sent=0.1752, sent/sec=460.8617
loss/sent=0.1532, sent/sec=460.8971
loss/sent=0.1792, sent/sec=460.9781
loss/sent=0.1749, sent/sec=461.0843
loss/sent=0.2238, sent/sec=461.1081
loss/sent=0.1463, sent/sec=461.1337
loss/sent=0.2054, sent/sec=461.0798
loss/sent=0.1594, sent/sec=461.1675
loss/sent=0.1815, sent/sec=461.2807


epoch 14: dev recall@1=21.60% recall@5=47.40% recall@10=61.20%


loss/sent=0.1659, sent/sec=461.1036
loss/sent=0.1201, sent/sec=461.1535
loss/sent=0.1639, sent/sec=461.1758
loss/sent=0.1327, sent/sec=461.1887
loss/sent=0.1236, sent/sec=461.2169
loss/sent=0.1407, sent/sec=461.2502
loss/sent=0.1486, sent/sec=461.3180
loss/sent=0.1412, sent/sec=461.4416
loss/sent=0.1111, sent/sec=461.6046
loss/sent=0.1375, sent/sec=461.7039


epoch 15: dev recall@1=19.80% recall@5=48.20% recall@10=62.00%


loss/sent=0.1506, sent/sec=461.7374
loss/sent=0.1303, sent/sec=461.8781
loss/sent=0.1334, sent/sec=461.9632
loss/sent=0.1574, sent/sec=461.9453
loss/sent=0.1580, sent/sec=462.0332
loss/sent=0.1458, sent/sec=462.0859
loss/sent=0.1497, sent/sec=462.1142
loss/sent=0.1248, sent/sec=462.2219
loss/sent=0.1324, sent/sec=462.3347
loss/sent=0.1434, sent/sec=462.4249


epoch 16: dev recall@1=20.40% recall@5=49.00% recall@10=62.20%


loss/sent=0.1786, sent/sec=462.3886
loss/sent=0.1024, sent/sec=462.4121
loss/sent=0.1271, sent/sec=462.5419
loss/sent=0.1268, sent/sec=462.6331
loss/sent=0.1217, sent/sec=462.8147
loss/sent=0.1243, sent/sec=462.8971
loss/sent=0.1349, sent/sec=462.9745
loss/sent=0.1256, sent/sec=462.9808
loss/sent=0.1230, sent/sec=463.0613
loss/sent=0.1311, sent/sec=463.1593


epoch 17: dev recall@1=22.80% recall@5=50.40% recall@10=64.20%


loss/sent=0.1253, sent/sec=463.1117
loss/sent=0.1225, sent/sec=463.1790
loss/sent=0.1038, sent/sec=463.2289
loss/sent=0.1615, sent/sec=463.3022
loss/sent=0.1029, sent/sec=463.3705
loss/sent=0.1606, sent/sec=463.4804
loss/sent=0.1045, sent/sec=463.5116
loss/sent=0.1130, sent/sec=463.5805
loss/sent=0.1206, sent/sec=463.5831
loss/sent=0.1401, sent/sec=463.5942


epoch 18: dev recall@1=22.40% recall@5=52.00% recall@10=63.60%


loss/sent=0.1299, sent/sec=463.5689
loss/sent=0.1213, sent/sec=463.6034
loss/sent=0.1153, sent/sec=463.6489
loss/sent=0.1327, sent/sec=463.6843
loss/sent=0.1129, sent/sec=463.6841
loss/sent=0.1147, sent/sec=463.7259
loss/sent=0.1122, sent/sec=463.7584
loss/sent=0.1251, sent/sec=463.7573
loss/sent=0.1319, sent/sec=463.8869
loss/sent=0.1026, sent/sec=463.8548


epoch 19: dev recall@1=19.60% recall@5=50.00% recall@10=62.60%


loss/sent=0.1280, sent/sec=463.7674
loss/sent=0.0893, sent/sec=463.7865
loss/sent=0.1029, sent/sec=463.8385
loss/sent=0.1065, sent/sec=463.8780
loss/sent=0.1059, sent/sec=463.9568
loss/sent=0.1323, sent/sec=463.9803
loss/sent=0.1133, sent/sec=464.0165
loss/sent=0.1018, sent/sec=463.9902
loss/sent=0.1020, sent/sec=464.0122
loss/sent=0.0833, sent/sec=463.9881


epoch 20: dev recall@1=23.40% recall@5=52.00% recall@10=65.60%


loss/sent=0.1158, sent/sec=463.9304
loss/sent=0.1028, sent/sec=463.8018
loss/sent=0.0941, sent/sec=463.7846
loss/sent=0.1034, sent/sec=463.7507
loss/sent=0.1038, sent/sec=463.8067
loss/sent=0.1245, sent/sec=463.8448
loss/sent=0.0906, sent/sec=463.8831
loss/sent=0.0767, sent/sec=463.9684
loss/sent=0.1123, sent/sec=463.9713
loss/sent=0.1343, sent/sec=464.0444


epoch 21: dev recall@1=27.40% recall@5=51.40% recall@10=63.20%


loss/sent=0.1099, sent/sec=464.0498
loss/sent=0.0758, sent/sec=464.1214
loss/sent=0.0904, sent/sec=464.2205
loss/sent=0.0849, sent/sec=464.2714
loss/sent=0.1067, sent/sec=464.3431
loss/sent=0.1170, sent/sec=464.3760
loss/sent=0.0743, sent/sec=464.4252
loss/sent=0.0854, sent/sec=464.4555
loss/sent=0.1022, sent/sec=464.4701
loss/sent=0.1022, sent/sec=464.5257


epoch 22: dev recall@1=27.00% recall@5=49.80% recall@10=63.80%


loss/sent=0.1319, sent/sec=464.5245
loss/sent=0.0803, sent/sec=464.6211
loss/sent=0.0889, sent/sec=464.6523
loss/sent=0.0870, sent/sec=464.6998
loss/sent=0.0961, sent/sec=464.7444
loss/sent=0.0949, sent/sec=464.7759
loss/sent=0.0929, sent/sec=464.7442
loss/sent=0.1089, sent/sec=464.7752
loss/sent=0.1070, sent/sec=464.7738
loss/sent=0.0832, sent/sec=464.7523


epoch 23: dev recall@1=25.20% recall@5=51.80% recall@10=64.20%


loss/sent=0.0906, sent/sec=464.7348
loss/sent=0.0869, sent/sec=464.7387
loss/sent=0.0636, sent/sec=464.6964
loss/sent=0.0776, sent/sec=464.6738
loss/sent=0.0803, sent/sec=464.6860
loss/sent=0.0789, sent/sec=464.7011
loss/sent=0.0733, sent/sec=464.6837
loss/sent=0.1034, sent/sec=464.6617
loss/sent=0.0853, sent/sec=464.6370
loss/sent=0.0914, sent/sec=464.5987
loss/sent=0.0799, sent/sec=464.5600


epoch 24: dev recall@1=25.40% recall@5=53.00% recall@10=65.20%


loss/sent=0.0803, sent/sec=464.5498
loss/sent=0.0921, sent/sec=464.5913
loss/sent=0.0689, sent/sec=464.5780
loss/sent=0.0710, sent/sec=464.6352
loss/sent=0.0817, sent/sec=464.6634
loss/sent=0.1129, sent/sec=464.6950
loss/sent=0.0789, sent/sec=464.7270
loss/sent=0.0829, sent/sec=464.7547
loss/sent=0.0865, sent/sec=464.8056
loss/sent=0.1036, sent/sec=464.8352


epoch 25: dev recall@1=25.40% recall@5=52.20% recall@10=64.40%


loss/sent=0.0866, sent/sec=464.7275
loss/sent=0.0696, sent/sec=464.7374
loss/sent=0.0879, sent/sec=464.7533
loss/sent=0.0945, sent/sec=464.7575
loss/sent=0.0885, sent/sec=464.8062
loss/sent=0.0937, sent/sec=464.8202
loss/sent=0.0914, sent/sec=464.7955
loss/sent=0.0908, sent/sec=464.8144
loss/sent=0.0722, sent/sec=464.7596
loss/sent=0.0924, sent/sec=464.6783


epoch 26: dev recall@1=25.20% recall@5=52.20% recall@10=64.40%


loss/sent=0.0815, sent/sec=464.5443
loss/sent=0.0876, sent/sec=464.4660
loss/sent=0.0771, sent/sec=464.3999
loss/sent=0.0634, sent/sec=464.3325
loss/sent=0.0808, sent/sec=464.2353
loss/sent=0.0917, sent/sec=464.1900
loss/sent=0.0588, sent/sec=464.0859
loss/sent=0.0821, sent/sec=464.0031
loss/sent=0.0781, sent/sec=464.0041
loss/sent=0.0768, sent/sec=463.9392


epoch 27: dev recall@1=24.40% recall@5=52.60% recall@10=65.20%


loss/sent=0.1030, sent/sec=463.8617
loss/sent=0.0833, sent/sec=463.7005
loss/sent=0.0825, sent/sec=463.6737
loss/sent=0.0879, sent/sec=463.6117
loss/sent=0.0870, sent/sec=463.5074
loss/sent=0.0956, sent/sec=463.4184
loss/sent=0.0886, sent/sec=463.3714
loss/sent=0.0796, sent/sec=463.2391
loss/sent=0.0709, sent/sec=463.1648
loss/sent=0.1000, sent/sec=463.0815


epoch 28: dev recall@1=27.40% recall@5=53.00% recall@10=64.40%


loss/sent=0.0763, sent/sec=463.0130
loss/sent=0.0624, sent/sec=463.0123
loss/sent=0.0846, sent/sec=463.0099
loss/sent=0.0792, sent/sec=462.9758
loss/sent=0.0618, sent/sec=462.9131
loss/sent=0.0632, sent/sec=462.9193
loss/sent=0.0801, sent/sec=462.9420
loss/sent=0.0599, sent/sec=462.9287
loss/sent=0.0621, sent/sec=462.9553
loss/sent=0.0768, sent/sec=463.0014


epoch 29: dev recall@1=23.00% recall@5=53.00% recall@10=62.60%


loss/sent=0.0581, sent/sec=462.9551
loss/sent=0.0703, sent/sec=463.0004
loss/sent=0.0783, sent/sec=462.9571
loss/sent=0.0749, sent/sec=462.9492
loss/sent=0.0715, sent/sec=462.9422
loss/sent=0.0730, sent/sec=462.9274
loss/sent=0.0689, sent/sec=462.9291
loss/sent=0.0777, sent/sec=462.9831
loss/sent=0.0757, sent/sec=463.0102
loss/sent=0.0758, sent/sec=463.0336


epoch 30: dev recall@1=26.20% recall@5=53.40% recall@10=66.60%


loss/sent=0.0749, sent/sec=463.0394
loss/sent=0.0608, sent/sec=463.0636
loss/sent=0.0571, sent/sec=463.0944
loss/sent=0.0587, sent/sec=463.1195
loss/sent=0.0694, sent/sec=463.1451
loss/sent=0.0539, sent/sec=463.2180
loss/sent=0.0784, sent/sec=463.2540
loss/sent=0.0592, sent/sec=463.3227
loss/sent=0.0752, sent/sec=463.3367
loss/sent=0.0718, sent/sec=463.3801


epoch 31: dev recall@1=25.20% recall@5=51.60% recall@10=66.40%


loss/sent=0.0616, sent/sec=463.3474
loss/sent=0.0729, sent/sec=463.4036
loss/sent=0.0969, sent/sec=463.4229
loss/sent=0.0750, sent/sec=463.4155
loss/sent=0.0585, sent/sec=463.4370
loss/sent=0.0740, sent/sec=463.4418
loss/sent=0.0812, sent/sec=463.4857
loss/sent=0.0530, sent/sec=463.5302
loss/sent=0.0660, sent/sec=463.5977
loss/sent=0.0510, sent/sec=463.6252


epoch 32: dev recall@1=23.00% recall@5=54.20% recall@10=67.00%


loss/sent=0.0567, sent/sec=463.6227
loss/sent=0.0536, sent/sec=463.6573
loss/sent=0.0634, sent/sec=463.6833
loss/sent=0.0580, sent/sec=463.7165
loss/sent=0.0666, sent/sec=463.7874
loss/sent=0.0656, sent/sec=463.7954
loss/sent=0.0551, sent/sec=463.8216
loss/sent=0.0484, sent/sec=463.8470
loss/sent=0.0517, sent/sec=463.9002
loss/sent=0.0445, sent/sec=463.9344


epoch 33: dev recall@1=25.80% recall@5=52.60% recall@10=65.60%


loss/sent=0.0584, sent/sec=463.9485
loss/sent=0.0704, sent/sec=463.9666
loss/sent=0.0657, sent/sec=464.0196
loss/sent=0.0502, sent/sec=464.0694
loss/sent=0.0313, sent/sec=464.1123
loss/sent=0.0707, sent/sec=464.1693
loss/sent=0.0578, sent/sec=464.2068
loss/sent=0.0611, sent/sec=464.2316
loss/sent=0.0497, sent/sec=464.2846
loss/sent=0.0599, sent/sec=464.3131


epoch 34: dev recall@1=26.40% recall@5=54.40% recall@10=66.80%


loss/sent=0.0552, sent/sec=464.3136
loss/sent=0.0384, sent/sec=464.3668
loss/sent=0.0649, sent/sec=464.3580
loss/sent=0.0759, sent/sec=464.4101
loss/sent=0.0583, sent/sec=464.3929
loss/sent=0.0485, sent/sec=464.3713
loss/sent=0.0499, sent/sec=464.3781
loss/sent=0.0611, sent/sec=464.3737
loss/sent=0.0532, sent/sec=464.4065
loss/sent=0.0395, sent/sec=464.4231


epoch 35: dev recall@1=24.40% recall@5=51.80% recall@10=64.40%


loss/sent=0.0424, sent/sec=464.4146
loss/sent=0.0543, sent/sec=464.4440
loss/sent=0.0536, sent/sec=464.4934
loss/sent=0.0760, sent/sec=464.4705
loss/sent=0.0426, sent/sec=464.4805
loss/sent=0.0441, sent/sec=464.4809
loss/sent=0.0480, sent/sec=464.4905
loss/sent=0.0583, sent/sec=464.5230
loss/sent=0.0610, sent/sec=464.5100
loss/sent=0.0624, sent/sec=464.4855


epoch 36: dev recall@1=25.80% recall@5=52.00% recall@10=65.60%


loss/sent=0.0484, sent/sec=464.3330
loss/sent=0.0557, sent/sec=464.2038
loss/sent=0.0386, sent/sec=464.0929
loss/sent=0.0389, sent/sec=464.0608
loss/sent=0.0379, sent/sec=464.0223
loss/sent=0.0513, sent/sec=463.9535
loss/sent=0.0277, sent/sec=463.8320
loss/sent=0.0416, sent/sec=463.7552
loss/sent=0.0554, sent/sec=463.6665
loss/sent=0.0566, sent/sec=463.6355
loss/sent=0.0454, sent/sec=463.6236


epoch 37: dev recall@1=27.20% recall@5=54.00% recall@10=65.60%


loss/sent=0.0442, sent/sec=463.6260
loss/sent=0.0487, sent/sec=463.6268
loss/sent=0.0433, sent/sec=463.6845
loss/sent=0.0672, sent/sec=463.7136
loss/sent=0.0453, sent/sec=463.7647
loss/sent=0.0482, sent/sec=463.7387
loss/sent=0.0413, sent/sec=463.7141
loss/sent=0.0531, sent/sec=463.7284
loss/sent=0.0652, sent/sec=463.7265
loss/sent=0.0552, sent/sec=463.7195


epoch 38: dev recall@1=24.60% recall@5=53.20% recall@10=68.00%


loss/sent=0.0473, sent/sec=463.6712
loss/sent=0.0392, sent/sec=463.6678
loss/sent=0.0417, sent/sec=463.6548
loss/sent=0.0330, sent/sec=463.6720
loss/sent=0.0636, sent/sec=463.7003
loss/sent=0.0571, sent/sec=463.7565
loss/sent=0.0638, sent/sec=463.7710
loss/sent=0.0603, sent/sec=463.8234
loss/sent=0.0488, sent/sec=463.8655
loss/sent=0.0659, sent/sec=463.9401


epoch 39: dev recall@1=25.20% recall@5=51.60% recall@10=68.00%


loss/sent=0.0373, sent/sec=463.9503
loss/sent=0.0375, sent/sec=463.9879
loss/sent=0.0555, sent/sec=464.0225
loss/sent=0.0428, sent/sec=464.0738
loss/sent=0.0345, sent/sec=464.1277
loss/sent=0.0409, sent/sec=464.1594
loss/sent=0.0390, sent/sec=464.1693
loss/sent=0.0624, sent/sec=464.1833
loss/sent=0.0348, sent/sec=464.1704
loss/sent=0.0753, sent/sec=464.1724


epoch 40: dev recall@1=27.20% recall@5=52.60% recall@10=65.80%


loss/sent=0.0456, sent/sec=464.1201
loss/sent=0.0628, sent/sec=464.1128
loss/sent=0.0421, sent/sec=464.1122
loss/sent=0.0408, sent/sec=464.0958
loss/sent=0.0515, sent/sec=464.1160
loss/sent=0.0528, sent/sec=464.1196
loss/sent=0.0424, sent/sec=464.1250
loss/sent=0.0449, sent/sec=464.1458
loss/sent=0.0474, sent/sec=464.1675
loss/sent=0.0292, sent/sec=464.1794


epoch 41: dev recall@1=27.80% recall@5=54.20% recall@10=65.60%


loss/sent=0.0399, sent/sec=464.1322
loss/sent=0.0454, sent/sec=464.1006
loss/sent=0.0574, sent/sec=464.0858
loss/sent=0.0560, sent/sec=464.1095
loss/sent=0.0391, sent/sec=464.1019
loss/sent=0.0368, sent/sec=464.1597
loss/sent=0.0495, sent/sec=464.1843
loss/sent=0.0327, sent/sec=464.2141
loss/sent=0.0531, sent/sec=464.2437
loss/sent=0.0481, sent/sec=464.2716


epoch 42: dev recall@1=26.80% recall@5=50.40% recall@10=62.60%


loss/sent=0.0417, sent/sec=464.2592
loss/sent=0.0420, sent/sec=464.2980
loss/sent=0.0495, sent/sec=464.3220
loss/sent=0.0541, sent/sec=464.3591
loss/sent=0.0478, sent/sec=464.3773
loss/sent=0.0638, sent/sec=464.3900
loss/sent=0.0418, sent/sec=464.4093
loss/sent=0.0441, sent/sec=464.4545
loss/sent=0.0493, sent/sec=464.4586
loss/sent=0.0356, sent/sec=464.4756


epoch 43: dev recall@1=28.40% recall@5=55.60% recall@10=67.40%


loss/sent=0.0563, sent/sec=464.4704
loss/sent=0.0284, sent/sec=464.4926
loss/sent=0.0592, sent/sec=464.5245
loss/sent=0.0301, sent/sec=464.5204
loss/sent=0.0492, sent/sec=464.5636
loss/sent=0.0257, sent/sec=464.5919
loss/sent=0.0518, sent/sec=464.6002
loss/sent=0.0481, sent/sec=464.6201
loss/sent=0.0351, sent/sec=464.6321
loss/sent=0.0417, sent/sec=464.6471


epoch 44: dev recall@1=27.80% recall@5=55.40% recall@10=67.80%


loss/sent=0.0405, sent/sec=464.6541
loss/sent=0.0304, sent/sec=464.6771
loss/sent=0.0405, sent/sec=464.7205
loss/sent=0.0482, sent/sec=464.7344
loss/sent=0.0277, sent/sec=464.7575
loss/sent=0.0340, sent/sec=464.7856
loss/sent=0.0382, sent/sec=464.8032
loss/sent=0.0402, sent/sec=464.7955
loss/sent=0.0596, sent/sec=464.8200
loss/sent=0.0347, sent/sec=464.8715


epoch 45: dev recall@1=28.80% recall@5=54.40% recall@10=68.40%


loss/sent=0.0515, sent/sec=464.8991
loss/sent=0.0423, sent/sec=464.9323
loss/sent=0.0427, sent/sec=464.9432
loss/sent=0.0362, sent/sec=464.9708
loss/sent=0.0523, sent/sec=465.0326
loss/sent=0.0509, sent/sec=465.0718
loss/sent=0.0474, sent/sec=465.0764
loss/sent=0.0374, sent/sec=465.0781
loss/sent=0.0353, sent/sec=465.1136
loss/sent=0.0540, sent/sec=465.1576


epoch 46: dev recall@1=29.80% recall@5=55.80% recall@10=67.40%


loss/sent=0.0304, sent/sec=465.1726
loss/sent=0.0393, sent/sec=465.1473
loss/sent=0.0317, sent/sec=465.1806
loss/sent=0.0387, sent/sec=465.1749
loss/sent=0.0299, sent/sec=465.2056
loss/sent=0.0384, sent/sec=465.2228
loss/sent=0.0473, sent/sec=465.2055
loss/sent=0.0286, sent/sec=465.1691
loss/sent=0.0360, sent/sec=465.1852
loss/sent=0.0330, sent/sec=465.2291


epoch 47: dev recall@1=27.80% recall@5=56.60% recall@10=69.00%


loss/sent=0.0612, sent/sec=465.2232
loss/sent=0.0278, sent/sec=465.2337
loss/sent=0.0301, sent/sec=465.2338
loss/sent=0.0293, sent/sec=465.2178
loss/sent=0.0351, sent/sec=465.2002
loss/sent=0.0377, sent/sec=465.1739
loss/sent=0.0417, sent/sec=465.0856
loss/sent=0.0404, sent/sec=464.9939
loss/sent=0.0342, sent/sec=464.8884
loss/sent=0.0394, sent/sec=464.7700


epoch 48: dev recall@1=26.60% recall@5=57.00% recall@10=69.60%


loss/sent=0.0409, sent/sec=464.6379
loss/sent=0.0499, sent/sec=464.5586
loss/sent=0.0448, sent/sec=464.5657
loss/sent=0.0343, sent/sec=464.5825
loss/sent=0.0418, sent/sec=464.5661
loss/sent=0.0499, sent/sec=464.5484
loss/sent=0.0321, sent/sec=464.5014
loss/sent=0.0382, sent/sec=464.4814
loss/sent=0.0334, sent/sec=464.4722
loss/sent=0.0458, sent/sec=464.4375
loss/sent=0.0430, sent/sec=464.4364


epoch 49: dev recall@1=31.00% recall@5=56.20% recall@10=68.60%


loss/sent=0.0359, sent/sec=464.3911
loss/sent=0.0400, sent/sec=464.3995
loss/sent=0.0399, sent/sec=464.3874
loss/sent=0.0421, sent/sec=464.3506
loss/sent=0.0471, sent/sec=464.3516
loss/sent=0.0376, sent/sec=464.3504
loss/sent=0.0499, sent/sec=464.3659
loss/sent=0.0392, sent/sec=464.3631
loss/sent=0.0348, sent/sec=464.3726
loss/sent=0.0303, sent/sec=464.3781


epoch 50: dev recall@1=28.40% recall@5=57.40% recall@10=69.40%


loss/sent=0.0398, sent/sec=464.3803
loss/sent=0.0277, sent/sec=464.3993
loss/sent=0.0329, sent/sec=464.4263
loss/sent=0.0340, sent/sec=464.4613
loss/sent=0.0240, sent/sec=464.4750
loss/sent=0.0354, sent/sec=464.4880
loss/sent=0.0311, sent/sec=464.4825
loss/sent=0.0343, sent/sec=464.4363
loss/sent=0.0351, sent/sec=464.4303
loss/sent=0.0241, sent/sec=464.4080


epoch 51: dev recall@1=30.40% recall@5=58.80% recall@10=69.00%


loss/sent=0.0350, sent/sec=464.3688
loss/sent=0.0278, sent/sec=464.3724
loss/sent=0.0328, sent/sec=464.3705
loss/sent=0.0180, sent/sec=464.3822
loss/sent=0.0355, sent/sec=464.4327
loss/sent=0.0429, sent/sec=464.4469
loss/sent=0.0421, sent/sec=464.4870
loss/sent=0.0398, sent/sec=464.5263
loss/sent=0.0379, sent/sec=464.5617
loss/sent=0.0269, sent/sec=464.5739


epoch 52: dev recall@1=28.80% recall@5=58.00% recall@10=68.60%


loss/sent=0.0375, sent/sec=464.5724
loss/sent=0.0247, sent/sec=464.6136
loss/sent=0.0462, sent/sec=464.6373
loss/sent=0.0410, sent/sec=464.6422
loss/sent=0.0438, sent/sec=464.6557
loss/sent=0.0466, sent/sec=464.6591
loss/sent=0.0259, sent/sec=464.7170
loss/sent=0.0323, sent/sec=464.7322
loss/sent=0.0291, sent/sec=464.7488
loss/sent=0.0268, sent/sec=464.7730


epoch 53: dev recall@1=30.40% recall@5=57.60% recall@10=67.40%


loss/sent=0.0297, sent/sec=464.7577
loss/sent=0.0428, sent/sec=464.7902
loss/sent=0.0281, sent/sec=464.8037
loss/sent=0.0485, sent/sec=464.8347
loss/sent=0.0488, sent/sec=464.8597
loss/sent=0.0287, sent/sec=464.8760
loss/sent=0.0141, sent/sec=464.9200
loss/sent=0.0319, sent/sec=464.9306
loss/sent=0.0223, sent/sec=464.9598
loss/sent=0.0322, sent/sec=464.9788


epoch 54: dev recall@1=31.20% recall@5=59.80% recall@10=68.80%


loss/sent=0.0227, sent/sec=464.9709
loss/sent=0.0198, sent/sec=465.0033
loss/sent=0.0450, sent/sec=465.0428
loss/sent=0.0380, sent/sec=465.0626
loss/sent=0.0290, sent/sec=465.0745
loss/sent=0.0308, sent/sec=465.0815
loss/sent=0.0283, sent/sec=465.0859
loss/sent=0.0429, sent/sec=465.0772
loss/sent=0.0327, sent/sec=465.0681
loss/sent=0.0346, sent/sec=465.0641


epoch 55: dev recall@1=31.20% recall@5=57.80% recall@10=70.80%


loss/sent=0.0396, sent/sec=465.0392
loss/sent=0.0211, sent/sec=465.0529
loss/sent=0.0314, sent/sec=465.0581
loss/sent=0.0271, sent/sec=465.0630
loss/sent=0.0305, sent/sec=465.0839
loss/sent=0.0368, sent/sec=465.0983
loss/sent=0.0253, sent/sec=465.1182
loss/sent=0.0332, sent/sec=465.1349
loss/sent=0.0356, sent/sec=465.1555
loss/sent=0.0316, sent/sec=465.1936


epoch 56: dev recall@1=30.60% recall@5=58.40% recall@10=70.00%


loss/sent=0.0334, sent/sec=465.1716
loss/sent=0.0302, sent/sec=465.1933
loss/sent=0.0189, sent/sec=465.2025
loss/sent=0.0251, sent/sec=465.2134
loss/sent=0.0200, sent/sec=465.2306
loss/sent=0.0297, sent/sec=465.2381
loss/sent=0.0266, sent/sec=465.2724
loss/sent=0.0485, sent/sec=465.2885
loss/sent=0.0308, sent/sec=465.3151
loss/sent=0.0156, sent/sec=465.3176


epoch 57: dev recall@1=31.60% recall@5=60.00% recall@10=71.00%


loss/sent=0.0262, sent/sec=465.3118
loss/sent=0.0286, sent/sec=465.3004
loss/sent=0.0358, sent/sec=465.3066
loss/sent=0.0330, sent/sec=465.3290
loss/sent=0.0341, sent/sec=465.3425
loss/sent=0.0318, sent/sec=465.3735
loss/sent=0.0260, sent/sec=465.4182
loss/sent=0.0297, sent/sec=465.4318
loss/sent=0.0312, sent/sec=465.4680
loss/sent=0.0248, sent/sec=465.4993


epoch 58: dev recall@1=28.60% recall@5=55.40% recall@10=66.60%


loss/sent=0.0291, sent/sec=465.5265
loss/sent=0.0351, sent/sec=465.5617
loss/sent=0.0229, sent/sec=465.6085
loss/sent=0.0309, sent/sec=465.6427
loss/sent=0.0268, sent/sec=465.6719
loss/sent=0.0236, sent/sec=465.6810
loss/sent=0.0222, sent/sec=465.7019
loss/sent=0.0409, sent/sec=465.7374
loss/sent=0.0175, sent/sec=465.7885
loss/sent=0.0276, sent/sec=465.8097


epoch 59: dev recall@1=31.00% recall@5=58.60% recall@10=71.00%


loss/sent=0.0286, sent/sec=465.8217
loss/sent=0.0327, sent/sec=465.8465
loss/sent=0.0272, sent/sec=465.8610
loss/sent=0.0383, sent/sec=465.8883
loss/sent=0.0225, sent/sec=465.8821
loss/sent=0.0185, sent/sec=465.8619
loss/sent=0.0307, sent/sec=465.8544
loss/sent=0.0310, sent/sec=465.8328
loss/sent=0.0230, sent/sec=465.8290
loss/sent=0.0340, sent/sec=465.8279


epoch 60: dev recall@1=32.20% recall@5=58.60% recall@10=71.40%


loss/sent=0.0305, sent/sec=465.8256
loss/sent=0.0180, sent/sec=465.8204
loss/sent=0.0385, sent/sec=465.7969
loss/sent=0.0367, sent/sec=465.7771
loss/sent=0.0300, sent/sec=465.7907
loss/sent=0.0276, sent/sec=465.7711
loss/sent=0.0322, sent/sec=465.7499
loss/sent=0.0328, sent/sec=465.7450
loss/sent=0.0272, sent/sec=465.7233
loss/sent=0.0319, sent/sec=465.7232
loss/sent=0.0244, sent/sec=465.7277


epoch 61: dev recall@1=31.80% recall@5=56.80% recall@10=70.60%


loss/sent=0.0362, sent/sec=465.7239
loss/sent=0.0275, sent/sec=465.7371
loss/sent=0.0367, sent/sec=465.7274
loss/sent=0.0360, sent/sec=465.7080
loss/sent=0.0244, sent/sec=465.7089
loss/sent=0.0302, sent/sec=465.7267
loss/sent=0.0294, sent/sec=465.7126
loss/sent=0.0213, sent/sec=465.6931
loss/sent=0.0327, sent/sec=465.6886
loss/sent=0.0370, sent/sec=465.6788


epoch 62: dev recall@1=32.40% recall@5=58.80% recall@10=69.60%


loss/sent=0.0184, sent/sec=465.6729
loss/sent=0.0283, sent/sec=465.6751
loss/sent=0.0322, sent/sec=465.6724
loss/sent=0.0238, sent/sec=465.6701
loss/sent=0.0345, sent/sec=465.6669
loss/sent=0.0356, sent/sec=465.6347
loss/sent=0.0265, sent/sec=465.6332
loss/sent=0.0168, sent/sec=465.6277
loss/sent=0.0264, sent/sec=465.6052
loss/sent=0.0301, sent/sec=465.5993


epoch 63: dev recall@1=32.60% recall@5=59.20% recall@10=69.00%


loss/sent=0.0249, sent/sec=465.5648
loss/sent=0.0183, sent/sec=465.5454
loss/sent=0.0195, sent/sec=465.5229
loss/sent=0.0288, sent/sec=465.4985
loss/sent=0.0339, sent/sec=465.4944
loss/sent=0.0242, sent/sec=465.4776
loss/sent=0.0238, sent/sec=465.4341
loss/sent=0.0270, sent/sec=465.4112
loss/sent=0.0309, sent/sec=465.3760
loss/sent=0.0255, sent/sec=465.3653


epoch 64: dev recall@1=31.00% recall@5=58.00% recall@10=71.00%


loss/sent=0.0283, sent/sec=465.3307
loss/sent=0.0169, sent/sec=465.3247
loss/sent=0.0213, sent/sec=465.3202
loss/sent=0.0276, sent/sec=465.3165
loss/sent=0.0223, sent/sec=465.3142
loss/sent=0.0176, sent/sec=465.3284
loss/sent=0.0397, sent/sec=465.3111
loss/sent=0.0439, sent/sec=465.2893
loss/sent=0.0251, sent/sec=465.2775
loss/sent=0.0364, sent/sec=465.2483


epoch 65: dev recall@1=30.80% recall@5=57.20% recall@10=71.00%


loss/sent=0.0246, sent/sec=465.2033
loss/sent=0.0268, sent/sec=465.2137
loss/sent=0.0314, sent/sec=465.2388
loss/sent=0.0430, sent/sec=465.2563
loss/sent=0.0304, sent/sec=465.2544
loss/sent=0.0229, sent/sec=465.2562
loss/sent=0.0362, sent/sec=465.2468
loss/sent=0.0271, sent/sec=465.2633
loss/sent=0.0283, sent/sec=465.2624
loss/sent=0.0233, sent/sec=465.2671


epoch 66: dev recall@1=30.80% recall@5=58.80% recall@10=73.00%


loss/sent=0.0238, sent/sec=465.2426
loss/sent=0.0268, sent/sec=465.2582
loss/sent=0.0306, sent/sec=465.2596
loss/sent=0.0272, sent/sec=465.2636
loss/sent=0.0192, sent/sec=465.2517
loss/sent=0.0345, sent/sec=465.2382
loss/sent=0.0164, sent/sec=465.2456
loss/sent=0.0300, sent/sec=465.2487
loss/sent=0.0247, sent/sec=465.2067
loss/sent=0.0218, sent/sec=465.1833


epoch 67: dev recall@1=31.60% recall@5=60.20% recall@10=72.20%


loss/sent=0.0314, sent/sec=465.1628
loss/sent=0.0198, sent/sec=465.1521
loss/sent=0.0339, sent/sec=465.1586
loss/sent=0.0376, sent/sec=465.1339
loss/sent=0.0248, sent/sec=465.1182
loss/sent=0.0242, sent/sec=465.1003
loss/sent=0.0248, sent/sec=465.0950
loss/sent=0.0211, sent/sec=465.0732
loss/sent=0.0279, sent/sec=465.0730
loss/sent=0.0231, sent/sec=465.0333


epoch 68: dev recall@1=30.20% recall@5=60.00% recall@10=69.20%


loss/sent=0.0228, sent/sec=465.0262
loss/sent=0.0337, sent/sec=465.0115
loss/sent=0.0334, sent/sec=465.0010
loss/sent=0.0292, sent/sec=465.0026
loss/sent=0.0235, sent/sec=464.9831
loss/sent=0.0235, sent/sec=464.9718
loss/sent=0.0139, sent/sec=464.9350
loss/sent=0.0254, sent/sec=464.9305
loss/sent=0.0226, sent/sec=464.9597
loss/sent=0.0243, sent/sec=464.9720


epoch 69: dev recall@1=30.60% recall@5=60.40% recall@10=70.00%


loss/sent=0.0326, sent/sec=464.9477
loss/sent=0.0190, sent/sec=464.9312
loss/sent=0.0225, sent/sec=464.9321
loss/sent=0.0171, sent/sec=464.9526
loss/sent=0.0222, sent/sec=464.9559
loss/sent=0.0244, sent/sec=464.9285
loss/sent=0.0224, sent/sec=464.9272
loss/sent=0.0352, sent/sec=464.9237
loss/sent=0.0261, sent/sec=464.9165
loss/sent=0.0417, sent/sec=464.8678


epoch 70: dev recall@1=32.20% recall@5=59.20% recall@10=70.40%


loss/sent=0.0260, sent/sec=464.8415
loss/sent=0.0313, sent/sec=464.8231
loss/sent=0.0228, sent/sec=464.8208
loss/sent=0.0221, sent/sec=464.8224
loss/sent=0.0202, sent/sec=464.8413
loss/sent=0.0337, sent/sec=464.8593
loss/sent=0.0217, sent/sec=464.8710
loss/sent=0.0234, sent/sec=464.8375
loss/sent=0.0288, sent/sec=464.8097
loss/sent=0.0308, sent/sec=464.7967


epoch 71: dev recall@1=30.60% recall@5=59.40% recall@10=70.60%


loss/sent=0.0207, sent/sec=464.7676
loss/sent=0.0269, sent/sec=464.7643
loss/sent=0.0250, sent/sec=464.7372
loss/sent=0.0165, sent/sec=464.7104
loss/sent=0.0196, sent/sec=464.6753
loss/sent=0.0162, sent/sec=464.6694
loss/sent=0.0117, sent/sec=464.6457
loss/sent=0.0185, sent/sec=464.6235
loss/sent=0.0233, sent/sec=464.6112
loss/sent=0.0205, sent/sec=464.6081


epoch 72: dev recall@1=32.40% recall@5=58.40% recall@10=68.00%


loss/sent=0.0416, sent/sec=464.5837
loss/sent=0.0082, sent/sec=464.5668
loss/sent=0.0175, sent/sec=464.5425
loss/sent=0.0208, sent/sec=464.5072
loss/sent=0.0275, sent/sec=464.4735
loss/sent=0.0297, sent/sec=464.4441
loss/sent=0.0212, sent/sec=464.4231
loss/sent=0.0227, sent/sec=464.4104
loss/sent=0.0250, sent/sec=464.4243
loss/sent=0.0195, sent/sec=464.4046


epoch 73: dev recall@1=32.60% recall@5=56.60% recall@10=67.40%


loss/sent=0.0165, sent/sec=464.3979
loss/sent=0.0222, sent/sec=464.3864
loss/sent=0.0240, sent/sec=464.3639
loss/sent=0.0222, sent/sec=464.3413
loss/sent=0.0249, sent/sec=464.3154
loss/sent=0.0192, sent/sec=464.3176
loss/sent=0.0293, sent/sec=464.3219
loss/sent=0.0170, sent/sec=464.3132
loss/sent=0.0248, sent/sec=464.3127
loss/sent=0.0232, sent/sec=464.3011
loss/sent=0.0251, sent/sec=464.2941


epoch 74: dev recall@1=33.80% recall@5=58.00% recall@10=68.80%


loss/sent=0.0178, sent/sec=464.2663
loss/sent=0.0215, sent/sec=464.2600
loss/sent=0.0363, sent/sec=464.2409
loss/sent=0.0379, sent/sec=464.2147
loss/sent=0.0259, sent/sec=464.2060
loss/sent=0.0189, sent/sec=464.1965
loss/sent=0.0300, sent/sec=464.1532
loss/sent=0.0212, sent/sec=464.1404
loss/sent=0.0206, sent/sec=464.0989
loss/sent=0.0198, sent/sec=464.0656


epoch 75: dev recall@1=32.20% recall@5=58.40% recall@10=70.60%


loss/sent=0.0169, sent/sec=464.0176
loss/sent=0.0220, sent/sec=463.9653
loss/sent=0.0419, sent/sec=463.9404
loss/sent=0.0259, sent/sec=463.9049
loss/sent=0.0259, sent/sec=463.8696
loss/sent=0.0235, sent/sec=463.8420
loss/sent=0.0196, sent/sec=463.7981
loss/sent=0.0085, sent/sec=463.7498
loss/sent=0.0175, sent/sec=463.7350
loss/sent=0.0153, sent/sec=463.6743


epoch 76: dev recall@1=34.00% recall@5=59.20% recall@10=72.00%


loss/sent=0.0162, sent/sec=463.6427
loss/sent=0.0241, sent/sec=463.6372
loss/sent=0.0164, sent/sec=463.6150
loss/sent=0.0245, sent/sec=463.5937
loss/sent=0.0276, sent/sec=463.5765
loss/sent=0.0152, sent/sec=463.5748
loss/sent=0.0146, sent/sec=463.5348
loss/sent=0.0228, sent/sec=463.4922
loss/sent=0.0177, sent/sec=463.4583
loss/sent=0.0291, sent/sec=463.4150


epoch 77: dev recall@1=34.60% recall@5=61.00% recall@10=71.80%


loss/sent=0.0255, sent/sec=463.3652
loss/sent=0.0206, sent/sec=463.3031
loss/sent=0.0178, sent/sec=463.2689
loss/sent=0.0203, sent/sec=463.1914
loss/sent=0.0167, sent/sec=463.1471
loss/sent=0.0172, sent/sec=463.0916
loss/sent=0.0192, sent/sec=463.0491
loss/sent=0.0200, sent/sec=463.0332
loss/sent=0.0198, sent/sec=463.0222
loss/sent=0.0220, sent/sec=463.0023


epoch 78: dev recall@1=33.00% recall@5=61.40% recall@10=74.40%


loss/sent=0.0129, sent/sec=462.9641
loss/sent=0.0227, sent/sec=462.9392
loss/sent=0.0199, sent/sec=462.8830
loss/sent=0.0271, sent/sec=462.8447
loss/sent=0.0179, sent/sec=462.7901
loss/sent=0.0360, sent/sec=462.7299
loss/sent=0.0276, sent/sec=462.6961
loss/sent=0.0296, sent/sec=462.6861
loss/sent=0.0303, sent/sec=462.6656
loss/sent=0.0260, sent/sec=462.6438


epoch 79: dev recall@1=33.00% recall@5=60.80% recall@10=72.80%


loss/sent=0.0159, sent/sec=462.5877
loss/sent=0.0226, sent/sec=462.5453
loss/sent=0.0166, sent/sec=462.4993
loss/sent=0.0211, sent/sec=462.4758
loss/sent=0.0161, sent/sec=462.4416
loss/sent=0.0173, sent/sec=462.3862
loss/sent=0.0297, sent/sec=462.3358
loss/sent=0.0221, sent/sec=462.2896
loss/sent=0.0242, sent/sec=462.2439
loss/sent=0.0210, sent/sec=462.2040


epoch 80: dev recall@1=33.40% recall@5=60.60% recall@10=70.00%


loss/sent=0.0327, sent/sec=462.1473
loss/sent=0.0192, sent/sec=462.1287
loss/sent=0.0173, sent/sec=462.1169
loss/sent=0.0148, sent/sec=462.1199
loss/sent=0.0146, sent/sec=462.1326
loss/sent=0.0150, sent/sec=462.1383
loss/sent=0.0199, sent/sec=462.1249
loss/sent=0.0228, sent/sec=462.1126
loss/sent=0.0185, sent/sec=462.0977
loss/sent=0.0155, sent/sec=462.1123


epoch 81: dev recall@1=33.60% recall@5=61.40% recall@10=71.00%


loss/sent=0.0241, sent/sec=462.1082
loss/sent=0.0185, sent/sec=462.1195
loss/sent=0.0186, sent/sec=462.1425
loss/sent=0.0089, sent/sec=462.1374
loss/sent=0.0284, sent/sec=462.1551
loss/sent=0.0260, sent/sec=462.1724
loss/sent=0.0317, sent/sec=462.1586
loss/sent=0.0214, sent/sec=462.1519
loss/sent=0.0237, sent/sec=462.1780
loss/sent=0.0139, sent/sec=462.1918


epoch 82: dev recall@1=33.60% recall@5=61.40% recall@10=71.80%


loss/sent=0.0215, sent/sec=462.1983
loss/sent=0.0219, sent/sec=462.2172
loss/sent=0.0226, sent/sec=462.2350
loss/sent=0.0167, sent/sec=462.2526
loss/sent=0.0173, sent/sec=462.2789
loss/sent=0.0157, sent/sec=462.3008
loss/sent=0.0146, sent/sec=462.3201
loss/sent=0.0250, sent/sec=462.3246
loss/sent=0.0305, sent/sec=462.3469
loss/sent=0.0149, sent/sec=462.3537


epoch 83: dev recall@1=33.00% recall@5=59.60% recall@10=70.00%


loss/sent=0.0324, sent/sec=462.3503
loss/sent=0.0250, sent/sec=462.3463
loss/sent=0.0162, sent/sec=462.3431
loss/sent=0.0133, sent/sec=462.3516
loss/sent=0.0324, sent/sec=462.3806
loss/sent=0.0285, sent/sec=462.3928
loss/sent=0.0180, sent/sec=462.3983
loss/sent=0.0206, sent/sec=462.3948
loss/sent=0.0248, sent/sec=462.3944
loss/sent=0.0327, sent/sec=462.3882


epoch 84: dev recall@1=34.60% recall@5=61.40% recall@10=70.60%


loss/sent=0.0116, sent/sec=462.3754
loss/sent=0.0231, sent/sec=462.3783
loss/sent=0.0076, sent/sec=462.3837
loss/sent=0.0388, sent/sec=462.3761
loss/sent=0.0188, sent/sec=462.3656
loss/sent=0.0233, sent/sec=462.3393
loss/sent=0.0200, sent/sec=462.3275
loss/sent=0.0191, sent/sec=462.2941
loss/sent=0.0173, sent/sec=462.2750
loss/sent=0.0268, sent/sec=462.2704


epoch 85: dev recall@1=33.20% recall@5=60.00% recall@10=72.00%


loss/sent=0.0242, sent/sec=462.2594
loss/sent=0.0230, sent/sec=462.2404
loss/sent=0.0212, sent/sec=462.2489
loss/sent=0.0184, sent/sec=462.2478
loss/sent=0.0137, sent/sec=462.2341
loss/sent=0.0084, sent/sec=462.2511
loss/sent=0.0142, sent/sec=462.2797
loss/sent=0.0166, sent/sec=462.2802
loss/sent=0.0090, sent/sec=462.2968
loss/sent=0.0132, sent/sec=462.3240
loss/sent=0.0190, sent/sec=462.3520


epoch 86: dev recall@1=31.80% recall@5=61.00% recall@10=72.20%


loss/sent=0.0122, sent/sec=462.3567
loss/sent=0.0141, sent/sec=462.3686
loss/sent=0.0109, sent/sec=462.3830
loss/sent=0.0204, sent/sec=462.3971
loss/sent=0.0196, sent/sec=462.4006
loss/sent=0.0329, sent/sec=462.4068
loss/sent=0.0213, sent/sec=462.4032
loss/sent=0.0189, sent/sec=462.4038
loss/sent=0.0166, sent/sec=462.4093
loss/sent=0.0070, sent/sec=462.4047


epoch 87: dev recall@1=33.40% recall@5=59.80% recall@10=71.40%


loss/sent=0.0120, sent/sec=462.3747
loss/sent=0.0172, sent/sec=462.3844
loss/sent=0.0186, sent/sec=462.3863
loss/sent=0.0184, sent/sec=462.3922
loss/sent=0.0194, sent/sec=462.3874
loss/sent=0.0148, sent/sec=462.3836
loss/sent=0.0172, sent/sec=462.3742
loss/sent=0.0229, sent/sec=462.3781
loss/sent=0.0250, sent/sec=462.3941
loss/sent=0.0315, sent/sec=462.4175


epoch 88: dev recall@1=32.60% recall@5=61.00% recall@10=71.00%


loss/sent=0.0264, sent/sec=462.4083
loss/sent=0.0172, sent/sec=462.4050
loss/sent=0.0209, sent/sec=462.4022
loss/sent=0.0151, sent/sec=462.3927
loss/sent=0.0199, sent/sec=462.3372
loss/sent=0.0304, sent/sec=462.2794
loss/sent=0.0197, sent/sec=462.2270
loss/sent=0.0076, sent/sec=462.1692
loss/sent=0.0163, sent/sec=462.1161
loss/sent=0.0143, sent/sec=462.0982


epoch 89: dev recall@1=32.20% recall@5=60.80% recall@10=72.00%


loss/sent=0.0177, sent/sec=462.0843
loss/sent=0.0087, sent/sec=462.0789
loss/sent=0.0148, sent/sec=462.0711
loss/sent=0.0154, sent/sec=462.0491
loss/sent=0.0261, sent/sec=462.0384
loss/sent=0.0128, sent/sec=462.0340
loss/sent=0.0084, sent/sec=462.0319
loss/sent=0.0187, sent/sec=462.0187
loss/sent=0.0209, sent/sec=462.0119
loss/sent=0.0179, sent/sec=461.9898


epoch 90: dev recall@1=31.60% recall@5=58.40% recall@10=70.00%


loss/sent=0.0128, sent/sec=461.9647
loss/sent=0.0148, sent/sec=461.9159
loss/sent=0.0165, sent/sec=461.8407
loss/sent=0.0266, sent/sec=461.7964
loss/sent=0.0148, sent/sec=461.7374
loss/sent=0.0146, sent/sec=461.7001
loss/sent=0.0254, sent/sec=461.6718
loss/sent=0.0288, sent/sec=461.6303
loss/sent=0.0205, sent/sec=461.6243
loss/sent=0.0196, sent/sec=461.5989


epoch 91: dev recall@1=32.60% recall@5=61.40% recall@10=71.60%


loss/sent=0.0081, sent/sec=461.4830
loss/sent=0.0172, sent/sec=461.4165
loss/sent=0.0170, sent/sec=461.3226
loss/sent=0.0310, sent/sec=461.2721
loss/sent=0.0128, sent/sec=461.1831
loss/sent=0.0175, sent/sec=461.1146
loss/sent=0.0169, sent/sec=461.0521
loss/sent=0.0213, sent/sec=460.9852
loss/sent=0.0114, sent/sec=460.9202
loss/sent=0.0185, sent/sec=460.8912


epoch 92: dev recall@1=33.60% recall@5=60.80% recall@10=72.00%


loss/sent=0.0099, sent/sec=460.8043
loss/sent=0.0219, sent/sec=460.7385
loss/sent=0.0131, sent/sec=460.6880
loss/sent=0.0148, sent/sec=460.6104
loss/sent=0.0096, sent/sec=460.5402
loss/sent=0.0081, sent/sec=460.4522
loss/sent=0.0159, sent/sec=460.3299
loss/sent=0.0158, sent/sec=460.2649
loss/sent=0.0140, sent/sec=460.1514
loss/sent=0.0120, sent/sec=460.0679


epoch 93: dev recall@1=31.80% recall@5=60.20% recall@10=72.20%


loss/sent=0.0160, sent/sec=459.9958
loss/sent=0.0214, sent/sec=459.9303
loss/sent=0.0122, sent/sec=459.8722
loss/sent=0.0233, sent/sec=459.8309
loss/sent=0.0198, sent/sec=459.7795
loss/sent=0.0154, sent/sec=459.7252
loss/sent=0.0159, sent/sec=459.7024
loss/sent=0.0157, sent/sec=459.6480
loss/sent=0.0200, sent/sec=459.6215
loss/sent=0.0149, sent/sec=459.5835


epoch 94: dev recall@1=31.20% recall@5=59.00% recall@10=70.40%


loss/sent=0.0239, sent/sec=459.5272
loss/sent=0.0117, sent/sec=459.4846
loss/sent=0.0173, sent/sec=459.4692
loss/sent=0.0100, sent/sec=459.4274
loss/sent=0.0149, sent/sec=459.3944
loss/sent=0.0208, sent/sec=459.3635
loss/sent=0.0214, sent/sec=459.3456
loss/sent=0.0108, sent/sec=459.3253
loss/sent=0.0220, sent/sec=459.3149
loss/sent=0.0087, sent/sec=459.2950


epoch 95: dev recall@1=33.40% recall@5=61.00% recall@10=72.60%


loss/sent=0.0177, sent/sec=459.2756
loss/sent=0.0143, sent/sec=459.2564
loss/sent=0.0097, sent/sec=459.1686
loss/sent=0.0084, sent/sec=459.0996
loss/sent=0.0168, sent/sec=459.0051
loss/sent=0.0162, sent/sec=458.8910
loss/sent=0.0151, sent/sec=458.8057
loss/sent=0.0158, sent/sec=458.7732
loss/sent=0.0153, sent/sec=458.7292
loss/sent=0.0114, sent/sec=458.6641


epoch 96: dev recall@1=31.80% recall@5=60.20% recall@10=70.60%


loss/sent=0.0189, sent/sec=458.5893
loss/sent=0.0111, sent/sec=458.5002
loss/sent=0.0103, sent/sec=458.4000
loss/sent=0.0102, sent/sec=458.3046
loss/sent=0.0225, sent/sec=458.1979
loss/sent=0.0065, sent/sec=458.1306
loss/sent=0.0191, sent/sec=458.0937
loss/sent=0.0092, sent/sec=458.0555
loss/sent=0.0144, sent/sec=457.9917
loss/sent=0.0066, sent/sec=457.9703


epoch 97: dev recall@1=29.40% recall@5=56.80% recall@10=72.20%


loss/sent=0.0177, sent/sec=457.9248
loss/sent=0.0150, sent/sec=457.8678
loss/sent=0.0210, sent/sec=457.8113
loss/sent=0.0236, sent/sec=457.7284
loss/sent=0.0168, sent/sec=457.6812
loss/sent=0.0057, sent/sec=457.6104
loss/sent=0.0277, sent/sec=457.5545
loss/sent=0.0172, sent/sec=457.4835
loss/sent=0.0221, sent/sec=457.3812
loss/sent=0.0221, sent/sec=457.2928


epoch 98: dev recall@1=34.00% recall@5=62.20% recall@10=72.80%


loss/sent=0.0190, sent/sec=457.1725
loss/sent=0.0142, sent/sec=457.0568
loss/sent=0.0092, sent/sec=456.9892
loss/sent=0.0154, sent/sec=456.9230
loss/sent=0.0235, sent/sec=456.8271
loss/sent=0.0155, sent/sec=456.7083
loss/sent=0.0183, sent/sec=456.6017
loss/sent=0.0202, sent/sec=456.5004
loss/sent=0.0184, sent/sec=456.3781
loss/sent=0.0162, sent/sec=456.2576
loss/sent=0.0168, sent/sec=456.1513


epoch 99: dev recall@1=33.00% recall@5=61.00% recall@10=72.00%
