### Baseline for DROP dataset

In [1]:
import torch
import numpy as np
import importlib
import allennlp


from allennlp.models.reading_comprehension.naqanet import NumericallyAugmentedQaNet
from allennlp.data.dataset_readers.reading_comprehension.drop import DropReader
from allennlp.models.encoder_decoders.simple_seq2seq import SimpleSeq2Seq
from allennlp.data.vocabulary import Vocabulary
from allennlp.modules.matrix_attention.bilinear_matrix_attention import BilinearMatrixAttention
from allennlp.data.iterators import BucketIterator
from allennlp.modules.text_field_embedders import TextFieldEmbedder, BasicTextFieldEmbedder
from allennlp.modules.token_embedders import Embedding
from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
from allennlp.training.trainer import Trainer

In [2]:
reader = DropReader()

In [3]:
train = reader._read("../data/drop_dataset/drop_dataset_dev.json")
dev = reader._read("../data/drop_dataset/drop_dataset_dummy.json")

In [4]:
vocab = Vocabulary.from_instances(train + dev)

100%|██████████| 19072/19072 [00:02<00:00, 8150.79it/s]


In [5]:
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

In [None]:
token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                            embedding_dim=EMBEDDING_DIM)
word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})

In [None]:
lstm1 = PytorchSeq2SeqWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
lstm2 = PytorchSeq2SeqWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
attn = BilinearMatrixAttention(EMBEDDING_DIM, HIDDEN_DIM)

In [None]:
model = NumericallyAugmentedQaNet(vocab, word_embeddings, 2, lstm1, attn, lstm2 )

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
iterator = BucketIterator(batch_size=2, sorting_keys=[("passage", "num_tokens")])
iterator.index_with(vocab)

In [None]:
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  iterator=iterator,
                  train_dataset=train,
                  validation_dataset=dev,
                  patience=10,
                  num_epochs=1000,
                  cuda_device=-1)

In [None]:
trainer.train()

em: 0.0084, f1: 0.0163, loss: 1960995.6629 ||: 100%|██████████| 4768/4768 [18:34<00:00,  4.78it/s]
em: 0.0090, f1: 0.0220, loss: 1960995.3151 ||: 100%|██████████| 4768/4768 [03:52<00:00,  3.92it/s]
em: 0.0381, f1: 0.0714, loss: 1960995.0116 ||: 100%|██████████| 4768/4768 [18:06<00:00,  5.60it/s]
em: 0.0428, f1: 0.1006, loss: 1960994.3583 ||: 100%|██████████| 4768/4768 [03:44<00:00,  4.14it/s]
em: 0.0549, f1: 0.0933, loss: 1960994.6337 ||: 100%|██████████| 4768/4768 [17:30<00:00,  4.54it/s]
em: 0.0521, f1: 0.1126, loss: 1960994.2154 ||: 100%|██████████| 4768/4768 [03:34<00:00,  4.25it/s]
em: 0.0673, f1: 0.1102, loss: 1960994.3408 ||: 100%|██████████| 4768/4768 [17:06<00:00,  4.98it/s]
em: 0.0788, f1: 0.1218, loss: 1960993.9575 ||: 100%|██████████| 4768/4768 [03:38<00:00,  4.33it/s]
em: 0.0749, f1: 0.1182, loss: 1960994.2264 ||: 100%|██████████| 4768/4768 [17:16<00:00,  4.71it/s]
em: 0.0236, f1: 0.0551, loss: 1960995.3339 ||: 100%|██████████| 4768/4768 [03:39<00:00,  4.21it/s]
em: 0.0239