## Parameters

In [None]:
ROOT_DIR = "../"

# for dataloader
TEXT_LENGTH_LIMIT = 512

# for vocab
MIN_COUNT = 2

# for model
EMBEDDING_DIM = 256
HIDDEN_DIM = 256
NUM_ATTENTION_HEADS = 8
NUM_LAYERS = 2

# for training
BATCH_SIZE = 8
LR = 1e-4
EPOCHS = 100
PATIENCE = 4

## Packages

In [None]:
import itertools
from overrides import overrides

import numpy as np
import pandas as pd

from typing import *

import torch
import torch.nn as nn
import torch.optim as optim

## DropReader

In [None]:
from allennlp.data.dataset_readers.reading_comprehension.drop import DropReader

reader = DropReader(passage_length_limit = TEXT_LENGTH_LIMIT, question_length_limit = TEXT_LENGTH_LIMIT)

In [None]:
train_dataset = reader.read(ROOT_DIR + "data/drop_dataset/drop_dataset_train.json")
dev_dataset = reader.read(ROOT_DIR + "data/drop_dataset/drop_dataset_dev.json")

In [None]:
#tmp = next(iter(train_dataset))
#vars(tmp)

## Model

In [None]:
from allennlp.data.vocabulary import Vocabulary

vocab = Vocabulary.from_instances(train_dataset, min_count={'tokens': MIN_COUNT})
vocab

In [None]:
from allennlp.modules.token_embedders import Embedding
from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder

embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMBEDDING_DIM)
source_embedder = BasicTextFieldEmbedder({"tokens": embedding})

In [None]:
from allennlp.modules.seq2seq_encoders import StackedSelfAttentionEncoder

encoder = StackedSelfAttentionEncoder(input_dim=EMBEDDING_DIM, 
                                      hidden_dim=HIDDEN_DIM, 
                                      projection_dim=HIDDEN_DIM, 
                                      feedforward_hidden_dim=HIDDEN_DIM, 
                                      num_layers=NUM_LAYERS, 
                                      num_attention_heads=NUM_ATTENTION_HEADS
                                      )

In [None]:
from allennlp.models.reading_comprehension.naqanet import NumericallyAugmentedQaNet
from allennlp.modules.matrix_attention.dot_product_matrix_attention import DotProductMatrixAttention

model = NumericallyAugmentedQaNet(vocab = vocab,
                                  text_field_embedder = source_embedder, 
                                  num_highway_layers = 2,
                                  phrase_layer = encoder, 
                                  matrix_attention_layer = DotProductMatrixAttention(),
                                  modeling_layer = encoder
                                 )


In [None]:
from allennlp.data.iterators import BucketIterator

optimizer = optim.Adam(model.parameters(), lr=LR)
iterator = BucketIterator(batch_size=BATCH_SIZE, sorting_keys=[("passage", "num_tokens")])
iterator.index_with(vocab)

In [None]:
if torch.cuda.is_available():
    print("cuda")
    cuda_device = 0
    model = model.cuda(cuda_device)
else:
    print("cpu")
    cuda_device = -1

## Training

In [None]:
from allennlp.training.trainer import Trainer

trainer = Trainer(model = model,
                  optimizer = optimizer,
                  iterator = iterator,
                  train_dataset = train_dataset,
                  validation_dataset = dev_dataset,
                  patience = PATIENCE,
                  num_epochs = EPOCHS,
                  cuda_device = cuda_device
                 )

In [None]:
trainer.train()

## Saving

In [None]:
with open(ROOT_DIR + "save/naqanet_model.th", 'wb') as f:
    torch.save(model.state_dict(), f)

In [None]:
vocab.save_to_files(ROOT_DIR + "save/naqanet_vocabulary")

## Results

In [None]:
vocab2 = Vocabulary.from_files(ROOT_DIR + "save/naqanet_vocabulary")

model2 = NumericallyAugmentedQaNet(vocab = vocab2,
                                  text_field_embedder = source_embedder, 
                                  num_highway_layers = 2,
                                  phrase_layer = encoder, 
                                  matrix_attention_layer = DotProductMatrixAttention(),
                                  modeling_layer = encoder
                                 )

with open(ROOT_DIR + "save/naqanet_model.th", 'rb') as f:
    model2.load_state_dict(torch.load(f))

if cuda_device > -1:
    model2.cuda(cuda_device)

In [None]:
from allennlp.predictors.predictor import Predictor

predictor = Predictor(model2, reader)

In [None]:
for instance in itertools.islice(dev_dataset, 20):
    ans = predictor.predict_instance(instance)['answer']
    if 'value' in ans:
        print('Passage:', instance.fields['passage'].tokens)
        print('Question:', instance.fields['question'].tokens)
        print('GOLD:', instance.fields['metadata'].metadata['answer_texts'])
        print('PRED:', ans['value'])
        print("=" * 10)
        print()

In [None]:
from allennlp.training.util import evaluate

metrics = evaluate(model2, dev_dataset, iterator, cuda_device, batch_weight_key="")

In [None]:
print(metrics)