### Baseline for DROP dataset

In [1]:
import torch
import numpy as np
import importlib
import allennlp


from allennlp.models.reading_comprehension.naqanet import NumericallyAugmentedQaNet
from allennlp.data.dataset_readers.reading_comprehension.drop import DropReader
from allennlp.models.encoder_decoders.simple_seq2seq import SimpleSeq2Seq
from allennlp.data.vocabulary import Vocabulary
from allennlp.modules.matrix_attention.bilinear_matrix_attention import BilinearMatrixAttention
from allennlp.data.iterators import BucketIterator
from allennlp.modules.text_field_embedders import TextFieldEmbedder, BasicTextFieldEmbedder
from allennlp.modules.token_embedders import Embedding
from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
from allennlp.training.trainer import Trainer

In [2]:
reader = DropReader()

In [4]:
train = reader._read("../data/drop_dataset/drop_dataset_dev.json")
dev = reader._read("../data/drop_dataset/drop_dataset_dummy.json")

In [5]:
vocab = Vocabulary.from_instances(train + dev)

100%|██████████| 10077/10077 [00:01<00:00, 8208.27it/s]


In [6]:
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

In [7]:
token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                            embedding_dim=EMBEDDING_DIM)
word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})

In [8]:
lstm1 = PytorchSeq2SeqWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
lstm2 = PytorchSeq2SeqWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
attn = BilinearMatrixAttention(EMBEDDING_DIM, HIDDEN_DIM)

In [9]:
model = NumericallyAugmentedQaNet(vocab, word_embeddings, 2, lstm1, attn, lstm2 )

In [10]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
iterator = BucketIterator(batch_size=2, sorting_keys=[("passage", "num_tokens")])
iterator.index_with(vocab)

In [11]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  iterator=iterator,
                  train_dataset=train,
                  validation_dataset=dev,
                  patience=10,
                  num_epochs=1000,
                  cuda_device=-1)

In [12]:
trainer.train()

em: 0.0124, f1: 0.0211, loss: 1960995.6096 ||: 100%|██████████| 4768/4768 [18:27<00:00,  4.32it/s]
em: 0.0259, f1: 0.0321, loss: 1125467.3066 ||: 100%|██████████| 271/271 [00:13<00:00,  6.79it/s]
em: 0.0459, f1: 0.0843, loss: 1960994.8242 ||: 100%|██████████| 4768/4768 [18:10<00:00,  4.23it/s]
em: 0.0795, f1: 0.1119, loss: 1125466.4940 ||: 100%|██████████| 271/271 [00:13<00:00,  7.14it/s]
em: 0.0500, f1: 0.0839, loss: 1960994.6929 ||: 100%|██████████| 4768/4768 [18:24<00:00,  4.85it/s]
em: 0.0277, f1: 0.0503, loss: 1125466.3285 ||: 100%|██████████| 271/271 [00:13<00:00,  6.88it/s]
em: 0.0545, f1: 0.0952, loss: 1960994.4897 ||: 100%|██████████| 4768/4768 [18:00<00:00,  3.62it/s]
em: 0.0277, f1: 0.0612, loss: 1125466.2227 ||: 100%|██████████| 271/271 [00:13<00:00,  7.28it/s]
em: 0.0536, f1: 0.0989, loss: 1998671.9102 ||:  31%|███▏      | 1501/4768 [05:25<14:12,  3.83it/s]

KeyboardInterrupt: 