<a href="https://colab.research.google.com/github/karvesaket/retroqa/blob/master/CIS_700_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Question Answering - Microsoft NewsQA Dataset

# Initial Setup

## Connect Google Drive (R)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


## Tensorboad setup (R)

In [0]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

# Load the TensorBoard notebook extension
%load_ext tensorboard

from torch.utils.tensorboard import SummaryWriter

# Data Loading and Processing  (R)

In [3]:
import torch
from torchtext import data

import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

 (R)

In [4]:
import pandas as pd

def populateDatafields(somedf, col_dict):
  datafields = []
  for col in somedf.columns:
    if col in col_dict.keys():
      datafields.append((col, col_dict[col]))
    # if(col == "story_text"):
    #   datafields.append((col, col_dict[col]))
    # elif col == "question":
    #   datafields.append((col, TEXT))
    else:
      datafields.append((col, None))
  return datafields

newsqa_df = pd.read_csv('/content/drive/Shared drives/CIS 700-1 Final Project/Data/combined-newsqa-data-json-spacy-LATEST-train.csv')
newsqa_df

Unnamed: 0,story_id,question,is_answer_absent,is_question_bad,validated_answers,story_text,char_start_index,char_end_index,char_text,word_start_index,word_end_index,word_text
0,./cnn/stories/b94ba34137ea0eea08e20b7e1580ad59...,What does the VP say?,1.000000,0.000000,,Former Vice President Dick Cheney said Sunday ...,,,,,,
1,./cnn/stories/e65cfdce945df6624f2516a9ad2f9292...,What show was Susan on?,0.000000,0.000000,,The Scottish woman who became an Internet sing...,299,322,"""Britain's Got Talent.""\n",59,66,""" Britain 's Got Talent . "" \n\n\n\n"
2,./cnn/stories/8fbcaf3abc124b7baaa278d382411f43...,what about the cubans,0.333333,0.666667,,Hurricane Gustav churned into the Gulf of Mexi...,,,,,,
3,./cnn/stories/ce137f38b5e2f793d25cee8b458a8add...,What does the book chroncile?,0.500000,0.500000,,"""Sesame Street"" may not be a real place, but t...",,,,,,
4,./cnn/stories/1d8bc084e34d2f35349c773bfcde3422...,Is there a trust fund established for her?,0.000000,0.000000,,A California woman who turned up alive 18 year...,2747,2755,has been,571,572,has been
...,...,...,...,...,...,...,...,...,...,...,...,...
82342,./cnn/stories/4776859089a777c567f574bc2145e8af...,What is a key stumbling block?,0.000000,0.000000,,Israeli government ministers Wednesday overwhe...,4742,4764,Continued construction,888,889,Continued construction
82343,./cnn/stories/94b37c8f266c3fe90e30a565ef0e4edf...,What were the protests about?,0.000000,0.000000,,France is sending four state police units to i...,753,808,low wages and living conditions in the Caribbe...,135,143,low wages and living conditions in the Caribbe...
82344,./cnn/stories/e16f0e93099b1b467bfbf8ec58426ca8...,What was the storm's name?,0.333333,0.000000,"{""10:27"": 2}",Hurricane Paloma continued to intensify Friday...,0,16,Hurricane Paloma,0,1,Hurricane Paloma
82345,./cnn/stories/7b40a01e9a18baf4506e6c06a13f3ca9...,Who wanted to bring girlfriend,0.000000,0.000000,,Walking into school Wednesday morning was not ...,55,74,Constance McMillen.,9,11,Constance McMillen .


## Split data to train and test
**Do not run this everytime**

In [0]:
from sklearn.model_selection import train_test_split
# create train and validation set 
train, val = train_test_split(newsqa_df, test_size=0.2)
train.to_csv('/content/drive/Shared drives/CIS 700-1 Final Project/Data/combined-newsqa-data-json-spacy-answerable-train.csv', index=False)
val.to_csv('/content/drive/Shared drives/CIS 700-1 Final Project/Data/combined-newsqa-data-json-spacy-answerable-val.csv', index=False)

## Load data and create iterators (R)

NOTE - Change mode to 'bert' here when running for bert

Also remember to change the train and val files to correct files

In [5]:
VERBOSE = True
RUN_MODE = 'word' # ['word', 'sentence', 'bert']

def split_start(x, y):
  idx = x[0].split(",")[0]
  if idx == 'None':
    return int(-1)
  else:
    return int(idx)

process_start = data.Pipeline(split_start)

def split_end(x, y):
  if len(x[0].split(",")) > 1:
    idx = x[0].split(",")[len(x[0].split(",")) - 1]
    if idx == 'None':
      return int(-1)
    else:
      return int(idx)
  else:
    idx = x[0].split(",")[0]
    if idx == 'None':
      return int(-1)
    else:
      return int(idx)

process_end = data.Pipeline(split_end)

import math
def floor_label(x, y):
  return math.floor(float(x[0]))

process_answerable = data.Pipeline(floor_label)

if RUN_MODE == 'bert':
  !pip install transformers
  from transformers import BertTokenizer, AlbertTokenizer

  # Load the BERT tokenizer.
  print('Loading BERT tokenizer...')
  tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
  # tokenizer = AlbertTokenizer.from_pretrained('albert-large-v2', do_lower_case=True)

  # print("Original: ", newsqa_df['story_text'][0])
  print("Tokenized: ", tokenizer.tokenize(newsqa_df['story_text'][0]))
  # print(len(tokenizer.vocab))


  init_token_idx = tokenizer.cls_token_id
  eos_token_idx = tokenizer.sep_token_id
  pad_token_idx = tokenizer.pad_token_id
  unk_token_idx = tokenizer.unk_token_id

  max_input_length = tokenizer.max_model_input_sizes['bert-base-uncased']
  # max_input_length = tokenizer.max_model_input_sizes['albert-base-v2']

  def tokenize_and_cut(sentence):
    tokens = tokenizer.tokenize(sentence) 
    # tokens = tokens[:max_input_length-2]
    return tokens

  BERT_FIELD = data.Field(batch_first = True,
              use_vocab = False,
              tokenize = tokenize_and_cut,
              preprocessing = tokenizer.convert_tokens_to_ids,
              init_token = init_token_idx,
              eos_token = eos_token_idx,
              pad_token = pad_token_idx,
              unk_token = unk_token_idx)

from nltk.tokenize import sent_tokenize
WORD_MODE_FIELD = data.Field(sequential=True, lower=True, tokenize='spacy', include_lengths=True, batch_first=True)
WORD_FIELD = data.Field(sequential=True, lower=True, tokenize='spacy')
SENTENCE_FIELD = data.NestedField(WORD_FIELD, tokenize=sent_tokenize, include_lengths=True)
START_INDEX = data.Field(sequential=True, postprocessing=process_start, use_vocab=False)
END_INDEX = data.Field(sequential=True, postprocessing=process_end, use_vocab=False)
ANSWERABLE = data.Field(sequential=True, postprocessing=process_answerable, use_vocab=False)


if RUN_MODE == 'word':
  col_dict = {'story_text': WORD_MODE_FIELD, 'question': WORD_MODE_FIELD, 'word_start_index': START_INDEX, 'word_end_index': END_INDEX, 'is_answer_absent': ANSWERABLE}
elif RUN_MODE == 'bert':
  col_dict = {'story_text': BERT_FIELD, 'question': BERT_FIELD, 'word_start_index_1': START_INDEX, 'word_end_index_1': END_INDEX, 'is_answer_absent': ANSWERABLE}
elif RUN_MODE == 'sentence':
  col_dict = {'story_text': SENTENCE_FIELD, 'question': WORD_FIELD, 'word_start_index_1': START_INDEX, 'word_end_index_1': END_INDEX, 'is_answer_absent': ANSWERABLE}

datafields = populateDatafields(newsqa_df, col_dict)
if VERBOSE:
  print(datafields)

print("Building Dataset...")
training_data=data.TabularDataset(path = '/content/drive/Shared drives/CIS 700-1 Final Project/Data/combined-newsqa-data-json-spacy-LATEST-train.csv',\
                                  format = 'csv',\
                                  fields = datafields,\
                                  skip_header = True)

validation_data=data.TabularDataset(path = '/content/drive/Shared drives/CIS 700-1 Final Project/Data/combined-newsqa-data-json-spacy-LATEST-val.csv',\
                                  format = 'csv',\
                                  fields = datafields,\
                                  skip_header = True)

if VERBOSE:
  count = 0
  for t in training_data:
    print("*******************************")
    print("Story Text: ", len(t.story_text), t.story_text)
    print("Question: ", t.question)
    print("Start Index: ", t.word_start_index)
    print("End Index: ", t.word_end_index)
    print("Unanswerable: ", t.is_answer_absent)
    # print("Answer: ", t.story_text[int(t.word_start_index_1):int(t.word_end_index_1)])

    if count > 5:
      break
    count += 1

from torchtext.vocab import Vectors, GloVe

print("Building Vocab...")

name = '6B'
dim = 300
if RUN_MODE == 'word':
  WORD_MODE_FIELD.build_vocab(training_data, validation_data, min_freq = 3, vectors=GloVe(name = name, dim = dim))
  if VERBOSE:
    print("Length of Vocab: ", len(WORD_MODE_FIELD.vocab))
elif RUN_MODE == 'sentence':
  SENTENCE_FIELD.build_vocab(training_data, validation_data, min_freq = 3, vectors=GloVe(name = name, dim = dim))
  if VERBOSE:
    print("Length of Vocab: ", len(SENTENCE_FIELD.vocab))

if RUN_MODE == 'word':
  FIELD = WORD_MODE_FIELD
elif RUN_MODE == 'sentence':
  FIELD = SENTENCE_FIELD

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
BATCH_SIZE = 32

print("Initializing the iterator...")
# Define the train iterator
train_iterator = data.BucketIterator(
    training_data, 
    batch_size = BATCH_SIZE,
    sort_key = lambda x: len(x.story_text),
    sort_within_batch = True,
    repeat=False, 
    shuffle=True,
    device=device)

val_iterator = data.BucketIterator(
    validation_data, 
    batch_size = 1,
    sort_key = lambda x: len(x.story_text),
    sort_within_batch = False,
    sort=False,
    repeat=False,
    shuffle=False,
    device=device)

if VERBOSE:
  for batch in train_iterator:
    print("Story: ", batch.story_text[0].shape, batch.story_text[1].shape)
    print("Start/End: ", batch.word_start_index, batch.word_end_index, batch.is_answer_absent)
    break

[('story_id', None), ('question', <torchtext.data.field.Field object at 0x7fc42fa352e8>), ('is_answer_absent', <torchtext.data.field.Field object at 0x7fc3d6f6e4a8>), ('is_question_bad', None), ('validated_answers', None), ('story_text', <torchtext.data.field.Field object at 0x7fc42fa352e8>), ('char_start_index', None), ('char_end_index', None), ('char_text', None), ('word_start_index', <torchtext.data.field.Field object at 0x7fc3d6f6e710>), ('word_end_index', <torchtext.data.field.Field object at 0x7fc3d6f6e7b8>), ('word_text', None)]
Building Dataset...


.vector_cache/glove.6B.zip: 0.00B [00:00, ?B/s]

*******************************
Story Text:  386 ['former', 'vice', 'president', 'dick', 'cheney', 'said', 'sunday', 'he', 'supports', 'the', 'obama', 'administration', "'s", 'decision', 'to', 'repeal', 'the', '"', 'do', "n't", 'ask', ',', 'do', "n't", 'tell', '"', 'policy', 'banning', 'gays', 'and', 'lesbians', 'from', 'serving', 'openly', 'in', 'the', 'military', '--', 'a', 'move', 'that', 'was', 'staunchly', 'opposed', 'by', 'most', 'top', 'republicans', '.', '\n\n\n\n\n\n', '"', 'i', 'think', 'the', 'decision', 'that', "'s", 'been', 'made', 'with', 'respect', 'to', 'allowing', 'gays', 'to', 'serve', 'openly', 'in', 'the', 'military', 'is', 'a', 'good', 'one', '"', 'cheney', 'told', 'cnn', "'s", 'candy', 'crowley', 'on', '"', 'state', 'of', 'the', 'union', '.', '"', '"', 'it', "'s", 'the', 'right', 'thing', 'to', 'do', '.', '"', '\n\n\n\n\n\n', 'the', 'policy', ',', 'first', 'enacted', 'during', 'the', 'clinton', 'administration', ',', 'was', 'officially', 'repealed', 'on', 'septemb

.vector_cache/glove.6B.zip: 862MB [06:27, 2.22MB/s]                          
100%|█████████▉| 399779/400000 [00:50<00:00, 8116.89it/s]

Length of Vocab:  88494
Initializing the iterator...


100%|█████████▉| 399779/400000 [01:10<00:00, 8116.89it/s]

Story:  torch.Size([32, 733]) torch.Size([32])
Start/End:  tensor([ 69, 505,  96,  65, 135,  97,  19, 565,  -1,  21, 325,   1,  81,  53,
        123,  93,  -1,  54,   6,  -1,  -1,  25,  42, 614,  10,   0,   0,  -1,
         -1, 278,  49,   0], device='cuda:0') tensor([ 70, 515,  97,  73, 137,  97,  19, 569,  -1,  24, 327,   1,  90,  53,
        123, 119,  -1,  54,  11,  -1,  -1,  32,  59, 620,  11,   0,   1,  -1,
         -1, 278,  55,   1], device='cuda:0') tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')


### Testing 

Do not run

In [0]:
print("Number of batches: ", len(train_iterator))
for batch in train_iterator:
  seq_length = batch.story_text[0].shape[1]
  start = batch.word_start_index
  end = batch.word_end_index
  smask = torch.sum((start >= seq_length).long())
  if smask != 0:
    print("Found issue in start!")
    print(seq_length, start)

  emask = torch.sum((end >= seq_length).long())
  if emask != 0:
    print("Found issue in end!")
    print(seq_length, end)

Number of batches:  644


## BERT testing

Do not run

In [0]:
from transformers import BertTokenizer

# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

# print("Original: ", newsqa_df['story_text'][0])
print("Tokenized: ", tokenizer.tokenize(newsqa_df['story_text'][0]))
print(len(tokenizer.vocab))


init_token_idx = tokenizer.cls_token_id
eos_token_idx = tokenizer.sep_token_id
pad_token_idx = tokenizer.pad_token_id
unk_token_idx = tokenizer.unk_token_id

max_input_length = tokenizer.max_model_input_sizes['bert-base-uncased']

def tokenize_and_cut(sentence):
  tokens = tokenizer.tokenize(sentence) 
  tokens = tokens[:max_input_length-2]
  return tokens

BERT_FIELD = data.Field(batch_first = True,
              use_vocab = False,
              tokenize = tokenize_and_cut,
              preprocessing = tokenizer.convert_tokens_to_ids,
              init_token = init_token_idx,
              eos_token = eos_token_idx,
              pad_token = pad_token_idx,
              unk_token = unk_token_idx)
START_INDEX = data.Field(sequential=True, postprocessing=process_start, use_vocab=False)
END_INDEX = data.Field(sequential=True, postprocessing=process_end, use_vocab=False)
ANSWERABLE = data.Field(sequential=True, postprocessing=process_answerable, use_vocab=False)

col_dict = {'story_text': BERT_FIELD, 'question': BERT_FIELD, 'word_start_index_1': START_INDEX, 'word_end_index_1': END_INDEX, 'is_answer_absent': ANSWERABLE}

datafields = populateDatafields(newsqa_df, col_dict)
if VERBOSE:
  print(datafields)

print("Building Dataset...")
training_data=data.TabularDataset(path = '/content/drive/Shared drives/CIS 700-1 Final Project/Data/mini-combined-newsqa-data-v2.csv',\
                                  format = 'csv',\
                                  fields = datafields,\
                                  skip_header = True)
if VERBOSE:
  count = 0
  for t in training_data:
    print("*******************************")
    print("Story Text: ", len(t.story_text), t.story_text)
    print("Question: ", t.question)
    print("Start Index: ", t.word_start_index_1)
    print("End Index: ", t.word_end_index_1)
    print("Unanswerable: ", t.is_answer_absent)
    # print("Answer: ", t.story_text[int(t.word_start_index_1):int(t.word_end_index_1)])

    if count > 15:
      break
    count += 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
BATCH_SIZE = 2

print("Initializing the iterator...")
# Define the train iterator
train_iterator = data.BucketIterator(
    training_data, 
    batch_size = BATCH_SIZE,
    sort_key = lambda x: len(x.story_text),
    sort_within_batch = True,
    repeat=False, 
    shuffle=True,
    device = device)

if VERBOSE:
  for batch in train_iterator:
    print("Story: ", batch.story_text[0].shape, batch.story_text[1].shape)
    print("Start/End: ", batch.word_start_index_1, batch.word_end_index_1, batch.is_answer_absent)
    break

In [0]:
from transformers import BertTokenizer, BertModel

bert = BertModel.from_pretrained('bert-base-uncased')
bert = bert.to(device)
print(bert.config.to_dict()['hidden_size'])

def bert_encoder(document, question):
  with torch.no_grad():
    splits = torch.split(document, max_input_length-2, dim=1)
    embedded_document_splits = []
    for split in splits:
      embedded_document_splits.append(bert(split)[0])
    embedded_document = torch.cat(embedded_document_splits, dim=1)
    embedded_question = bert(question)
  print(embedded_document.shape, embedded_question[0].shape)

for batch in train_iterator:
  document = batch.story_text #[B, S, W]
  # doc_lengths = batch.story_text[1]
  question = batch.question #[B, W]

  bert_encoder(document, question)

# Model Definition (R)

In [0]:
import torch
from torch import nn
import numpy as np
from torch.nn import functional as F
import torch.nn.init as init
from torch.autograd import Variable

## Input Module

### Sketchy Reading Module (R)

In [0]:
class SketchyReading(nn.Module):
  def __init__(self, mode, vocab_size, embedding_length, word_embeddings=None, bert_encoder=None):
    super(SketchyReading, self).__init__()

    if mode not in ['avg', 'recurrent', 'word', 'bert']:
      raise ValueError("Choose a mode from - avg / recurrent / word / bert")
    self.mode = mode

    self.vocab_size = vocab_size
    self.embedding_length = embedding_length
    
    # Embedding Layer
    if self.mode == 'bert':
      if bert_encoder is None:
        raise ValueError("bert_encoder cannot be None when mode = bert")
      self.embeddings = bert_encoder
    else:
      if word_embeddings is None:
        raise ValueError("word_embeddings cannot be None when mode is not bert")
      self.embeddings = nn.Embedding(self.vocab_size, self.embedding_length)
      self.embeddings.weight = nn.Parameter(word_embeddings, requires_grad=False)

    self.recurrent = nn.GRU(self.embedding_length, self.embedding_length, batch_first=True)
    

  def forward(self, document, document_lengths, question):
    if self.mode == 'bert':
      splits = torch.split(document, max_input_length-2, dim=1)
      embedded_document_splits = []
      for split in splits:
        embedded_document_splits.append(self.embeddings(split)[0])
      D = torch.cat(embedded_document_splits, dim=1)
      # D = self.embeddings(document)[0]
      Q = self.embeddings(question)[0]
    elif self.mode == 'word':
      batch_size = document.shape[0]
      document = document.view(batch_size, -1)
      D = self.embeddings(document)
      Q = self.embeddings(question)
    elif self.mode == 'avg':
      embedded_document = self.embeddings(document)
      D = torch.mean(embedded_document, dim=2)
      Q = self.embeddings(question)
    elif self.mode == 'recurrent':
      embedded_document = self.embeddings(document)
      batch_size = embedded_document.shape[0]
      sent_length = embedded_document.shape[1]
      word_length = embedded_document.shape[2]
      temp_document = embedded_document.view(-1, word_length, self.embedding_length)
      document_lengths = document_lengths.view(-1)
      document_lengths[(document_lengths == 0)] = 1
      # print(temp_document.shape, document_lengths)
      packed_document = nn.utils.rnn.pack_padded_sequence(temp_document, document_lengths, batch_first=True, enforce_sorted=False)
      _, D = self.recurrent(packed_document)
      # print(D.shape)
      D = D.reshape(batch_size, sent_length, self.embedding_length)

      Q = self.embeddings(question)

    return D, Q

### Attention Module (R)

In [0]:
class Attention(nn.Module):
  def __init__(self, mode):
    super(Attention, self).__init__()
    if mode not in ['similarity', 'additive']:
      raise ValueError("Choose a mode from - avg / recurrent")
    self.mode = mode

  def forward(self, D, Q):
    # print(D.shape, Q.shape)
    if self.mode == 'similarity':
      QT = Q.permute(0, 2, 1)
      S = torch.bmm(D, QT)
    elif self.mode == 'additive':
      raise NotImplementedError
    return S

### Intensive Reading Module (R)

In [0]:
class IntensiveReading(nn.Module):
  def __init__(self, mode):
    super(IntensiveReading, self).__init__()

    self.attention = Attention(mode)

  def forward(self, D, Q):
    S = self.attention(D, Q)
    
    AQ = F.softmax(S, dim=2)
    D_q = torch.bmm(AQ, Q)
    DPrime = torch.cat((D, D_q), dim=2)

    AD = F.softmax(S, dim=1)
    AD = AD.permute(0, 2, 1)
    Q_d = torch.bmm(AD, D)
    QPrime = torch.cat((Q, Q_d), dim=2)
    
    return DPrime, QPrime

### Final Input Module (R)

In [0]:
class InputModule(nn.Module):
  def __init__(self, sketchy_mode, intensive_mode, vocab_size, embedding_length, word_embeddings=None, bert_encoder=None):
    super(InputModule, self).__init__()

    self.sketchy_reader = SketchyReading(sketchy_mode, vocab_size, embedding_length, word_embeddings, bert_encoder)
    self.sketchy_reader = self.sketchy_reader.to(device)

    self.intensive_reader = IntensiveReading(intensive_mode)
    self.intensive_reader = self.intensive_reader.to(device)
    
  def forward(self, document, document_lengths, question):
    D, Q = self.sketchy_reader(document, document_lengths, question)
    DPrime, QPrime = self.intensive_reader(D, Q)
    return D, Q, DPrime, QPrime

### Test

In [0]:
sketchy_mode = 'word'

intensive_mode = 'similarity'
if sketchy_mode == 'bert':
  from transformers import BertModel

  bert_encoder = BertModel.from_pretrained('bert-base-uncased')
  bert_encoder = bert_encoder.to(device)
  vocab_size = len(tokenizer.vocab)
  embedding_length = bert_encoder.config.to_dict()['hidden_size']
  word_embeddings = None
else:
  vocab_size = len(FIELD.vocab)
  embedding_length = 300
  word_embeddings = FIELD.vocab.vectors
  bert_encoder = None


input_model = InputModule(sketchy_mode, intensive_mode, vocab_size, embedding_length, word_embeddings, bert_encoder)
input_model = input_model.to(device)

for batch in train_iterator:
   #[B, S, W]
  if RUN_MODE == 'word':
    document = batch.story_text[0]
    doc_lengths = batch.story_text[1]
    question = batch.question[0]
  elif RUN_MODE == 'bert':
    document = batch.story_text
    question = batch.question
    doc_lengths = None
  elif RUN_MODE == 'sentence':
    document = batch.story_text[0]
    doc_lengths = batch.story_text[2]
    question = batch.question[0] #[B, W]
  D, Q, DPrime, QPrime = input_model(document, doc_lengths, question)
  print("DP", DPrime.shape, "QP", QPrime.shape)


## Episodic Memory Module

### Modified Attention GRU (R)

In [0]:
class AttnGRUCell(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(AttnGRUCell, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.Wr = nn.Linear(input_size, hidden_size)
    self.Ur = nn.Linear(hidden_size, hidden_size)
    self.W = nn.Linear(input_size, hidden_size)
    self.U = nn.Linear(hidden_size, hidden_size)

    init.xavier_normal_(self.Wr.state_dict()['weight'])
    init.xavier_normal_(self.Ur.state_dict()['weight'])
    init.xavier_normal_(self.W.state_dict()['weight'])
    init.xavier_normal_(self.U.state_dict()['weight'])

  def forward(self, fact, hi_1, g):
    # fact is the final output of InputModule for each sentence and fact.size() = (batch_size, embedding_length)
    # hi_1.size() = (batch_size, embedding_length=hidden_size)
    # g.size() = (batch_size, )

    r_i = torch.sigmoid(self.Wr(fact) + self.Ur(hi_1))
    h_tilda = torch.tanh(self.W(fact) + r_i*self.U(hi_1))
    g = g.unsqueeze(1)
    hi = g*h_tilda + (1 - g)*hi_1

    return hi # Returning the next hidden state considering the first fact and so on.


class AttnGRU(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(AttnGRU, self).__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.AttnGRUCell = AttnGRUCell(input_size, hidden_size)

  def forward(self, D, G):
    # D.size() = (batch_size, num_sentences, embedding_length)
    # fact.size() = (batch_size, embedding_length=hidden_size)
    # G.size() = (batch_size, num_sentences)
    # g.size() = (batch_size, )

    h_0 = Variable(torch.zeros(self.hidden_size)).cuda()

    hs = []

    for sen in range(D.size()[1]):
      sentence = D[:, sen, :]
      g = G[:, sen]
      if sen == 0: # Initialization for first sentence only 
        hi_1 = h_0.unsqueeze(0).expand_as(sentence)
      hi_1 = self.AttnGRUCell(sentence, hi_1, g)
      hs.append(hi_1.unsqueeze(1))
    
    hs = torch.cat(hs, dim=1)
    C = hi_1 # Final hidden vector as the contextual vector used for updating memory

    return C, hs

### Memory Module (R)

In [0]:
class MemoryModule(nn.Module): # Takes Document sentences, question and prev_mem as its and output next_mem
  def __init__(self, hidden_size):
    super(MemoryModule, self).__init__()
    self.hidden_size = hidden_size
    self.AttnGRU = AttnGRU(hidden_size, hidden_size)
    self.W1 = nn.Linear(4*hidden_size, hidden_size)
    self.W2 = nn.Linear(hidden_size, 1)
    self.W_mem = nn.Linear(3*hidden_size, hidden_size)
    self.dropout = nn.Dropout(0.2)

    init.xavier_normal_(self.W1.state_dict()['weight'])
    init.xavier_normal_(self.W2.state_dict()['weight'])
    init.xavier_normal_(self.W_mem.state_dict()['weight'])

  def gateMatrix(self, D, Q, prev_mem):
    # D.size() = (batch_size, num_sentences, embedding_length=hidden_size)
    # Q.size() = (batch_size, 1, embedding_length)
    # prev_mem.size() = (batch_size, 1, embedding_length)
    # z.size() = (batch_size, num_sentences, 4*embedding_length)
    # G.size() = (batch_size, num_sentences)

    Q = Q.expand_as(D)
    prev_mem = prev_mem.expand_as(D)
    embedding_length = D.shape[2]
    batch_size = D.shape[0]
    z = torch.cat([D*Q, D*prev_mem, torch.abs(D - Q), torch.abs(D - prev_mem)], dim=2)
    # z.size() = (batch_size, num_sentences, 4*embedding_length)
    z = z.view(-1, 4*embedding_length)
    Z = self.W2(torch.tanh(self.W1(z)))
    Z = Z.view(batch_size, -1)
    G = F.softmax(Z, dim=1)

    return G

  def forward(self, D, Q, prev_mem):
    # Q = Q.unsqueeze(1)
    # prev_mem = prev_mem.unsqueeze(1)
    G = self.gateMatrix(D, Q, prev_mem)
    # print("G: ", G.shape)
    C, hs = self.AttnGRU(D, G)
    # Now considering prev_mem, C and question, we will update the memory state as follows
    concat = torch.cat([prev_mem.squeeze(1), C, Q.squeeze(1)], dim=1)
    concat = self.dropout(concat)
    next_mem = F.relu(self.W_mem(concat))
    next_mem = next_mem.unsqueeze(1)

    return next_mem, hs, G

### Final Episodic Memory Module (R)

In [0]:
class EpisodicMemoryModule(nn.Module):
  def __init__(self, embedding_length, hidden_size, num_passes):
    super(EpisodicMemoryModule, self).__init__()

    self.embedding_length = embedding_length
    self.hidden_size = hidden_size
    self.num_passes = num_passes

    self.recurrent = nn.LSTM(self.embedding_length, self.hidden_size, batch_first=True)
    self.fc = nn.Linear(self.embedding_length, self.hidden_size)

    self.memory = MemoryModule(self.hidden_size)

  def forward(self, D, Q, question_lengths):
    #D.size()= (batch_size, num_sentences, embedding_length) 
    #Q.size() = (batch_size, num_words, embedding_length)

    D = F.relu(self.fc(D))
    if question_lengths is None:
      QPacked = Q
    else:
      QPacked = nn.utils.rnn.pack_padded_sequence(Q, question_lengths, batch_first=True, enforce_sorted=False)
    _, (Q, _) = self.recurrent(QPacked)
    Q = Q.permute(1, 0, 2)
    all_g = []
    hs = D
    M = Q
    for passes in range(self.num_passes):
        M, hs, G = self.memory(hs, Q, M)
        all_g.append(G.unsqueeze(1))
    return M, hs, torch.cat(all_g, dim=1)

### Test

In [0]:
sketchy_mode = 'word'
intensive_mode = 'similarity'
if sketchy_mode == 'bert':
  from transformers import BertModel

  bert_encoder = BertModel.from_pretrained('bert-base-uncased')
  bert_encoder = bert_encoder.to(device)
  vocab_size = len(tokenizer.vocab)
  embedding_length = bert_encoder.config.to_dict()['hidden_size']
  hidden_size = embedding_length
  word_embeddings = None
else:
  vocab_size = len(FIELD.vocab)
  embedding_length = 300
  hidden_size = embedding_length
  word_embeddings = FIELD.vocab.vectors
  bert_encoder = None

num_passes = 3


input_model = InputModule(sketchy_mode, intensive_mode, vocab_size, embedding_length, word_embeddings, bert_encoder)
input_model = input_model.to(device)

memory_model = EpisodicMemoryModule(2*embedding_length, hidden_size, num_passes)
memory_model = memory_model.to(device)

for batch in train_iterator:
   #[B, S, W]
  if RUN_MODE == 'word':
    document = batch.story_text[0]
    doc_lengths = batch.story_text[1]
    question = batch.question[0]
    question_lengths = batch.question[1]
  elif RUN_MODE == 'bert':
    document = batch.story_text
    question = batch.question
    doc_lengths = None
    question_lengths = None
  elif RUN_MODE == 'sentence':
    document = batch.story_text[0]
    doc_lengths = batch.story_text[2]
    question = batch.question[0] #[B, W]
    question_lengths = batch.question[1]

  D, Q, DPrime, QPrime = input_model(document, doc_lengths, question)
  print("DP", DPrime.shape, "QP", QPrime.shape)

  M, hs, all_g = memory_model(DPrime, QPrime, question_lengths)
  print("M", M.shape, " HS: ", hs.shape, "all_g": all_g.shape)

## Verification Module (R)

In [0]:
class VerificationModule(nn.Module):
  def __init__(self, mode, pool_mode, hidden_size):
    super(VerificationModule, self).__init__()

    if mode not in ['external', 'internal']:
      raise ValueError("Choose a mode from - external / internal")
    self.mode = mode

    if pool_mode not in ['max', 'avg']:
      raise ValueError("Choose a mode from - max / avg")
    self.pool_mode = pool_mode
    
    self.hidden_size = hidden_size
    self.fc = nn.Linear(self.hidden_size, 1)

  def forward(self, D, Q=None):
    if self.mode == 'external':
      if Q is None:
        raise ValueError("Q cannot be None when mode = external")
      DQ = torch.cat((D, Q), dim=1)
    else:
      DQ = D

    if self.pool_mode == 'max':
      DQ, _ = torch.max(DQ, dim=1)
    elif self.pool_mode == 'avg':
      DQ = torch.mean(DQ, dim=1)
    
    verifier_score = F.relu(self.fc(DQ))
    return verifier_score

## Output Module (R)

In [0]:
class OutputModule(nn.Module):
  def __init__(self, hidden_size):
    super(OutputModule, self).__init__()

    self.hidden_size = hidden_size

    self.dropout = nn.Dropout(0.2)
    self.fc_start = nn.Linear(self.hidden_size, 1)
    self.fc_end = nn.Linear(self.hidden_size, 1)

  def forward(self, M):
    M = self.dropout(M)
    start = F.relu(self.fc_start(M))
    end = F.relu(self.fc_end(M))
    return start, end

## Complete Architecture (R)

In [0]:
class Model(nn.Module):
  def __init__(self, sketchy_mode, intensive_mode, pool_mode, vocab_size, hidden_size, embedding_length, word_embeddings, bert_encoder=None, num_passes=3, skip_memory = False):
    super(Model, self).__init__()

    self.sketchy_mode = sketchy_mode
    self.intensive_mode = intensive_mode
    self.pool_mode = pool_mode
    self.skip_memory = skip_memory

    self.input_model = InputModule(sketchy_mode, intensive_mode, vocab_size, embedding_length, word_embeddings, bert_encoder=bert_encoder)
    self.input_model = self.input_model.to(device)

    self.memory_model = EpisodicMemoryModule(2*embedding_length, hidden_size, num_passes)
    self.memory_model = self.memory_model.to(device)

    self.external_verifier = VerificationModule('external', pool_mode, hidden_size)
    self.external_verifier = self.external_verifier.to(device)

    self.internal_verifier = VerificationModule('internal', pool_mode, hidden_size)
    self.internal_verifier = self.internal_verifier.to(device)

    self.output_model = OutputModule(hidden_size)
    self.output_model = self.output_model.to(device)

    self.fc = nn.Linear(2*embedding_length, embedding_length)

  def forward(self, document, doc_lengths, question, question_lengths):
    D, Q, DPrime, QPrime = self.input_model(document, doc_lengths, question)
    # print("DP", DPrime.shape, "QP", QPrime.shape)

    external_verifier_score = self.external_verifier(D, Q)
    
    all_g = None
    if self.skip_memory:
      all_hidden = F.relu(self.fc(DPrime))
      M = None
    else:
      M, all_hidden, all_g = self.memory_model(DPrime, QPrime, question_lengths)
      seq_length = DPrime.shape[1]
      MPrime = M.repeat(1, seq_length, 1)
      DPrime = F.relu(self.fc(DPrime))
      I = torch.cat((DPrime, MPrime), dim=2)
      all_hidden = F.relu(self.fc(I))

    internal_verifier_score = self.internal_verifier(all_hidden)

    if self.sketchy_mode in ['word', 'bert']:
      start, end = self.output_model(all_hidden)
      start = start.squeeze(2)
      end = end.squeeze(2)
    else:
      M = M.squeeze(1)
      start, end = self.output_model(M)
    return start, end, external_verifier_score, internal_verifier_score, all_g



### Test

In [0]:
sketchy_mode = 'word'
intensive_mode = 'similarity'
pool_mode = 'max'
if sketchy_mode == 'bert':
  from transformers import BertModel
  bert_encoder = BertModel.from_pretrained('bert-base-uncased')
  bert_encoder = bert_encoder.to(device)
  vocab_size = len(tokenizer.vocab)
  embedding_length = bert_encoder.config.to_dict()['hidden_size']
  hidden_size = embedding_length
  word_embeddings = None
else:
  vocab_size = len(FIELD.vocab)
  embedding_length = 300
  hidden_size = embedding_length
  word_embeddings = FIELD.vocab.vectors
  bert_encoder = None
num_passes = 3

model = Model(sketchy_mode, intensive_mode, pool_mode, vocab_size, hidden_size, embedding_length, word_embeddings, bert_encoder, num_passes)
model = model.to(device)

for batch in train_iterator:
  #[B, S, W]
  if RUN_MODE == 'word':
    document = batch.story_text[0]
    doc_lengths = batch.story_text[1]
    question = batch.question[0]
    question_lengths = batch.question[1]
  elif RUN_MODE == 'bert':
    document = batch.story_text
    question = batch.question
    doc_lengths = None
    question_lengths = None
  elif RUN_MODE == 'sentence':
    document = batch.story_text[0]
    doc_lengths = batch.story_text[2]
    question = batch.question[0] #[B, W]
    question_lengths = batch.question[1]
  
  start, end, ext_score, int_score = model(document, doc_lengths, question, question_lengths)

  if RUN_MODE == 'bert':
    bert_tokenizer = tokenizer
    vocab_itos = None
  else:
    vocab_itos = FIELD.vocab.itos
    bert_tokenizer = None
  EvaluationMetrics.formulate_answer_as_string(sketchy_mode, start, end, document, vocab_itos, bert_tokenizer)
  print("Start: ", start.shape, " End: ", end.shape, " ext_score: ", ext_score.shape, " int_score: ", int_score.shape)

# Training and evaluation

### Evaluation Metrics (R)

In [0]:
from collections import Counter
import string
import re

class EvaluationMetrics():

  def __init__(self):
    return 0

  @staticmethod
  def formulate_answer_as_string(mode, start_index, end_index, document, vocab_itos=None, bert_tokenizer=None):
    batch_size = document.shape[0]
    document = document.view(batch_size, -1)
    all_text = []
    # print(start_index.shape, end_index.shape)
    for idx in range(batch_size):
      doc = document[idx, :]
      if mode == 'word' or mode == 'bert':
        s = torch.argmax(start_index[idx, :])
        e = torch.argmax(end_index[idx, :])
      elif mode == 'true':
        s = start_index[idx, :]
        e = end_index[idx, :]
      elif mode == 'test':
        s = start_index[idx]
        e = end_index[idx]
      else:
        s = torch.floor(start_index[idx, :]).long()
        e = torch.floor(end_index[idx, :]).long()
      
      if s < 0 and e < 0:
        text = ''
      else:
        if vocab_itos is None:
          text = ' '.join(tokenizer.convert_ids_to_tokens(doc)[s:e+1])
        else:
          if e >= len(doc) or s >= len(doc):
            text = ''
          else:
            text = ' '.join([vocab_itos[doc[i]] for i in range(s, e+1)])
      all_text.append(text)

    # print(all_text)
    return all_text

  @staticmethod
  def f1_score(prediction, ground_truth):
    f1 = 0
    result = []
    for idx in range(len(prediction)):
      prediction_tokens = EvaluationMetrics.normalize_answer(prediction[idx]).split()
      ground_truth_tokens = EvaluationMetrics.normalize_answer(ground_truth[idx]).split()
      common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
      num_same = sum(common.values())
      if num_same == 0:
          f1 += 0
          result.append(0)
          continue
      precision = 1.0 * num_same / len(prediction_tokens)
      recall = 1.0 * num_same / len(ground_truth_tokens)
      curr_f1 = (2 * precision * recall) / (precision + recall)
      f1 += curr_f1
      result.append(curr_f1)
    return f1/len(prediction), result

  @staticmethod
  def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
      return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
      return ' '.join(text.split())

    def remove_punc(text):
      exclude = set(string.punctuation)
      return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
      return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

  @staticmethod
  def exact_match_score(prediction, ground_truth):
    em = 0
    result = []
    for idx in range(len(prediction)):
      curr_em = EvaluationMetrics.normalize_answer(prediction[idx]) == EvaluationMetrics.normalize_answer(ground_truth[idx])
      if curr_em == 1:
        result.append(1)
      else:
        result.append(0)
      em += (curr_em)
    return em/len(prediction), result

In [0]:
p = ["hello how are you", "hi hello", "bye"]
t = ["hello how are you", "dog hello", "bye hi"]
f1 = EvaluationMetrics.f1_score(p, t)
em = EvaluationMetrics.exact_match_score(p, t)
print(f1, em)

0.7222222222222222 0.3333333333333333


### Loss Function (R)

In [0]:
# true_labels --> {'start_index': val, 'end_index': val, 'unanswerable_ext': 1/0, 'unanswerable_int': 1/0}
# predictions --> {'start_index': val, 'end_index': val, 'unanswerable_ext': 1/0, 'unanswerable_int': 1/0}
def retrospective_loss(true_labels, predictions, mode='word', alpha1=0.5, alpha2=0.5):
  ans_loss = nn.BCEWithLogitsLoss()
  if mode in ['word', 'bert']:
    span_loss = nn.CrossEntropyLoss(ignore_index=-1)
  else:
    span_loss = nn.MSELoss()
  # print(true_labels['start_index'].dtype)
  loss_span = (span_loss(predictions['start_index'], true_labels['start_index']) \
               + span_loss(predictions['end_index'], true_labels['end_index']))/2.0
  loss_ans = (ans_loss(predictions['unanswerable_ext'], true_labels['unanswerable_ext'].float()) \
              + ans_loss(predictions['unanswerable_int'], true_labels['unanswerable_int'].float()))/2.0

  return (alpha1 * loss_span + alpha2 * loss_ans), None

# true_labels --> {'start_index': val, 'end_index': val, 'unanswerable_ext': 1/0, 'unanswerable_int': 1/0}
# predictions --> {'start_index': val, 'end_index': val, 'unanswerable_ext': 1/0, 'unanswerable_int': 1/0}
def retrospective_parallel_loss(true_labels, predictions, mode='word', alpha1=0.1, alpha2=1):
  batch_size = len(true_labels['unanswerable_int'])
  with torch.no_grad():
    positives = torch.sum(true_labels['unanswerable_int'], dim=0)
    pos_weight = (batch_size - positives) / (positives + 1e-4)
  
  ans_loss = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
  if mode in ['word', 'bert']:
    span_loss = nn.CrossEntropyLoss(ignore_index=-1)
  else:
    span_loss = nn.MSELoss()
  # print(true_labels['start_index'].dtype)
  loss_span = (span_loss(predictions['start_index'], true_labels['start_index']) \
               + span_loss(predictions['end_index'], true_labels['end_index']))
  # loss_ans = (ans_loss(predictions['unanswerable_ext'], true_labels['unanswerable_ext'].float()) \
  #             + ans_loss(predictions['unanswerable_int'], true_labels['unanswerable_int'].float()))/2.0
  loss_ans_int = ans_loss(predictions['unanswerable_int'], true_labels['unanswerable_int'].float())
  loss_ans_ext = ans_loss(predictions['unanswerable_ext'], true_labels['unanswerable_ext'].float())

  # print("Span: ", loss_span, " int: ", loss_ans_int, " ext: ", loss_ans_ext)
  return (alpha1 * loss_span + alpha2 * loss_ans_int), loss_ans_ext

# true_labels --> {'start_index': val, 'end_index': val, 'unanswerable_ext': 1/0, 'unanswerable_int': 1/0}
# predictions --> {'start_index': val, 'end_index': val, 'unanswerable_ext': 1/0, 'unanswerable_int': 1/0}
def retrospective_loss_span(true_labels, predictions, mode='word', alpha1=0.5, alpha2=0.5):
  ans_loss = nn.BCEWithLogitsLoss()
  if mode in ['word', 'bert']:
    span_loss = nn.CrossEntropyLoss(ignore_index=-1)
  else:
    span_loss = nn.MSELoss()
  # print(true_labels['start_index'].dtype)
  loss_span = (span_loss(predictions['start_index'], true_labels['start_index']) \
               + span_loss(predictions['end_index'], true_labels['end_index']))
  # loss_ans = (ans_loss(predictions['unanswerable_ext'], true_labels['unanswerable_ext'].float()) \
  #             + ans_loss(predictions['unanswerable_int'], true_labels['unanswerable_int'].float()))/2.0

  return loss_span, None


### Trainer and Evaluator (R)

In [0]:
class Experiment():
  def __init__(self, hyperparameters, log_dir="runs", verbose=True, print_every=100):
    self.hyperparameters = hyperparameters
    self.log_dir = log_dir
    self.verbose = verbose
    self.print_every = print_every

    self.model = Model(self.hyperparameters['sketchy_mode'], \
                       self.hyperparameters['intensive_mode'],\
                       self.hyperparameters['pool_mode'],\
                       self.hyperparameters['vocab_size'],\
                       self.hyperparameters['hidden_size'],\
                       self.hyperparameters['embedding_length'],\
                       self.hyperparameters['word_embeddings'],\
                       self.hyperparameters['bert_encoder'],\
                       self.hyperparameters['num_passes'],\
                       self.hyperparameters['skip_memory'])
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.model = self.model.to(device)

  def train_classifier(self, model, dataset_iterator, loss_function, optimizer, vocab_itos, \
                     num_epochs = 10, log = "runs", verbose = True, print_every = 100, expt_name = "default", start_epoch = 0):
    #tensorboard writer
    writer = SummaryWriter(log_dir=log)
    model.train()
    step = 0
    parallel = False
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    for epoch in range(start_epoch, num_epochs):
      total = 0
      total_loss = 0
      total_ext_loss = 0
      total_f1 = 0
      total_em = 0
      correct_start = 0
      correct_end = 0
      correct_both = 0
      epoch_step = 0
      all_true_ans = []
      all_pred_ans = []
      all_result_em = []
      all_result_f1 = []
      for batch in dataset_iterator:
        if RUN_MODE == 'word':
          document = batch.story_text[0]
          doc_lengths = batch.story_text[1]
          question = batch.question[0]
          question_lengths = batch.question[1]
        elif RUN_MODE == 'bert':
          document = batch.story_text
          question = batch.question
          doc_lengths = None
          question_lengths = None
        elif RUN_MODE == 'sentence':
          document = batch.story_text[0]
          doc_lengths = batch.story_text[2]
          question = batch.question[0] #[B, W]
          question_lengths = batch.question[1]
        # true_start_index = batch.word_start_index_1
        # true_end_index = batch.word_end_index_1
        true_start_index = batch.word_start_index
        true_end_index = batch.word_end_index
        unanswerable = batch.is_answer_absent

        optimizer.zero_grad()

        pred_start_index, pred_end_index, pred_ext_score, pred_int_score = model(document, doc_lengths, question, question_lengths)
        # print(pred_start_index, pred_end_index)
        # print(true_start_index, true_end_index)
        predictions = {'start_index': pred_start_index, 'end_index': pred_end_index, 'unanswerable_ext': pred_ext_score.squeeze(1), 'unanswerable_int': pred_int_score.squeeze(1)}
        if model.sketchy_mode in ['word', 'bert']:
          true_labels = {'start_index': true_start_index, 'end_index': true_end_index, 'unanswerable_ext': unanswerable, 'unanswerable_int': unanswerable}
        else:
          true_labels = {'start_index': true_start_index.float(), 'end_index': true_end_index.float(), 'unanswerable_ext': unanswerable, 'unanswerable_int': unanswerable}
        
        loss = loss_function(true_labels, predictions, model.sketchy_mode)
        # print(loss)
        if loss[1] is not None:
          # Parallel training mode
          parallel = True
          ext_loss = loss[1]
          final_loss = loss[0]
          ext_loss.backward()
          final_loss.backward()
          total_ext_loss += ext_loss.item()
          total_loss += final_loss.item()
        else:
          # print("l: ", loss)
          loss[0].backward()
          total_loss += loss[0].item()
        nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        
        optimizer.step()

        # print(model.sketchy_mode, pred_start_index, pred_end_index, document)
        all_pred_text = EvaluationMetrics.formulate_answer_as_string(model.sketchy_mode, pred_start_index, pred_end_index, document, self.hyperparameters['vocab_itos'], self.hyperparameters['bert_tokenizer'])
        all_true_text = EvaluationMetrics.formulate_answer_as_string("true", true_start_index.unsqueeze(1), true_end_index.unsqueeze(1), document, self.hyperparameters['vocab_itos'], self.hyperparameters['bert_tokenizer'])
        
        f1, result_f1 = EvaluationMetrics.f1_score(all_pred_text, all_true_text)
        total_f1 += f1
        all_result_f1 += result_f1
        
        em, result_em = EvaluationMetrics.exact_match_score(all_pred_text, all_true_text)
        total_em += em
        all_result_em += result_em

        all_true_ans += all_true_text
        all_pred_ans += all_pred_text
        

        start_pred = torch.argmax(pred_start_index, dim=1)
        correct_start += (torch.sum(start_pred == true_start_index)).item()
        # start_acc = (correct_start / len(true_start_index))

        end_pred = torch.argmax(pred_end_index, dim=1)
        correct_end += (torch.sum(end_pred == true_end_index)).item()
        # end_acc += (correct_end / len(true_end_index))

        correct_both += (torch.sum((start_pred == true_start_index) & (end_pred == true_end_index))).item()

        epoch_step += 1

        # batch_size = document.shape[0]
        # seq_length = document.shape[1]
        # s = [0 for _ in range(batch_size)]
        # e = [seq_length-1 for _ in range(batch_size)]
        # doc_text = EvaluationMetrics.formulate_answer_as_string("test", s, e, document, self.hyperparameters['vocab_itos'], None)
        # print("Doc: ", doc_text)
        # print("True indices: ", true_start_index, true_end_index, "True: ", all_true_text)
        # print("Pred indices: ", pred_start_index, pred_end_index,"Pred: ", all_pred_text)

        total += len(true_start_index)
        

        if ((step % print_every) == 0):
          if parallel:
            writer.add_scalar("External Loss/train", total_ext_loss/epoch_step, step)
            writer.add_scalar("Final Loss/train", total_loss/epoch_step, step)
          else:
            writer.add_scalar("Loss/train", total_loss/epoch_step, step)
          writer.add_scalar("F1/train", total_f1/epoch_step, step)
          writer.add_scalar("EM/train", total_em/epoch_step, step)
          writer.add_scalar("Start Acc/train", correct_start/total, step)
          writer.add_scalar("End Acc/train", correct_end/total, step)
          writer.add_scalar("Acc/train", correct_both/total, step)
          
          writer.add_text("Predictions", '\n'.join(all_pred_text), step)
          writer.add_text("True", '\n'.join(all_true_text), step)
          if verbose:
            if parallel:
              print("--- Step: %s Ext Loss: %s Final Loss: %s Start Acc: %s End Acc: %s Acc: %s F1: %s EM: %s" %(step, total_ext_loss/epoch_step, total_loss/epoch_step, (correct_start*100)/total, (correct_end*100)/total, (correct_both*100)/total, (total_f1 * 100)/epoch_step, (total_em * 100)/epoch_step))
            else:
              print("--- Step: %s Loss: %s Start Acc: %s End Acc: %s Acc: %s F1: %s EM: %s" %(step, total_loss/epoch_step,  (correct_start*100)/total, (correct_end*100)/total, (correct_both*100)/total, (total_f1 * 100)/epoch_step, (total_em * 100)/epoch_step))
        step = step+1

        torch.cuda.empty_cache()

        # Move everything back to cpu
        # document = document.to('cpu')
        # question = question.to('cpu')
        # true_start_index = true_start_index.to('cpu')
        # true_end_index = true_end_index.to('cpu')
        # unanswerable = unanswerable.to('cpu')


      print("Saving model...")
      state_to_save = {'epoch': epoch,\
                       'model_state_dict': model.state_dict(),\
                       'optimizer_state_dict': optimizer.state_dict()}
      torch.save(state_to_save, '/content/drive/Shared drives/CIS 700-1 Final Project/models/' + expt_name + '_' + str(epoch) + '.pt')

      print("Saving outputs...")
      output_dict = {'true_answer': all_true_ans,\
                     'pred_answer': all_pred_ans,\
                     'EM': all_result_em,\
                     'F1': all_result_f1}
      output_df = pd.DataFrame(output_dict)
      output_df.to_csv('/content/drive/Shared drives/CIS 700-1 Final Project/models/' + expt_name + '_' + str(epoch) + '.csv')
      
      if parallel:
        print("Epoch: %s External Loss: %s Final Loss: %s Start Acc: %s End Acc: %s Acc: %s F1: %s EM: %s"%(epoch+1, total_ext_loss/epoch_step, total_loss/epoch_step, (correct_start*100)/total, (correct_end*100)/total, (correct_both*100)/total, (total_f1 * 100)/epoch_step, (total_em * 100)/epoch_step))
      else:
        print("Epoch: %s Loss: %s Start Acc: %s End Acc: %s Acc: %s F1: %s EM: %s"%(epoch+1, total_loss/epoch_step,  (correct_start*100)/total, (correct_end*100)/total, (correct_both*100)/total, (total_f1 * 100)/epoch_step, (total_em * 100)/epoch_step))

      self.model = model


  def train(self, train_iterator, expt_name="default", load_model=False):
    loss_function = self.hyperparameters['loss_function']
    optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hyperparameters['learning_rate'])
    if load_model:
      print("Loading model...")
      checkpoint = torch.load('/content/drive/Shared drives/CIS 700-1 Final Project/models/' + expt_name + '.pt')
      self.model.load_state_dict(checkpoint['model_state_dict'])
      optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
      start_epoch = checkpoint['epoch']
    else:
      start_epoch = 0
    
    self.train_classifier(self.model,\
                     train_iterator,\
                     loss_function,\
                     optimizer,\
                     self.hyperparameters['vocab_itos'],\
                     self.hyperparameters['num_epochs'],\
                     self.log_dir,\
                     self.verbose,\
                     self.print_every,\
                     expt_name,\
                     start_epoch)
    
    return self.model

  # predictions --> {'start_index': val, 'end_index': val, 'ext_score': 1/0, 'int_score': 1/0}
  # parameters --> {'beta1': val, 'beta2': val, 'lambda1': val, 'lambda2': val, 'delta': val}
  def rear_verification(self, predictions, parameters):
    all_start = []
    all_end = []

    batch_size = predictions['start_index'].shape[0]
    seq_length = predictions['start_index'].shape[1]
    for idx in range(batch_size):
      s = predictions['start_index'][idx, :]
      e = predictions['end_index'][idx, :]

      grid_s, grid_e = torch.meshgrid(s.squeeze(), e.squeeze())
      sums = torch.triu(grid_s + grid_e)
      # print(sums.shape)
      score_has = torch.max(sums)
      indices = torch.argmax(sums)
      final_start = int(indices/seq_length)
      final_end = int(indices%seq_length)
      
      pooled_indices = torch.max(s) + torch.max(e)
      v = parameters['beta1']*predictions['unanswerable_ext'][idx] + parameters['beta2']*predictions['unanswerable_int'][idx]
      score_na = parameters['lambda1']*pooled_indices + parameters['lambda2']*v

      final_score = torch.abs(score_has - score_na)
      if final_score >= parameters['delta']:
        all_start.append(final_start)
        all_end.append(final_end)
      else:
        all_start.append(-1)
        all_end.append(-1)
    return torch.tensor(all_start), torch.tensor(all_end)

  def evaluate_classifier(self, model, dataset_iterator, evaluation_parameters, vocab_itos, expt_name = "default"):
    #tensorboard writer
    model.eval()
    step = 0
    total_f1 = 0
    total_em = 0
    total = 0
    correct_start = 0
    correct_end = 0
    correct_both = 0
    # all_true_ans = []
    # all_pred_ans = []
    all_start = []
    all_end = []
    # all_true_start = []
    # all_true_end = []
    # all_doc_text = []
    # all_question_text = []
    all_output = []
    for batch in dataset_iterator:
      if RUN_MODE == 'word':
        document = batch.story_text[0]
        doc_lengths = batch.story_text[1]
        question = batch.question[0]
        question_lengths = batch.question[1]
      elif RUN_MODE == 'bert':
        document = batch.story_text
        question = batch.question
        doc_lengths = None
        question_lengths = None
      elif RUN_MODE == 'sentence':
        document = batch.story_text[0]
        doc_lengths = batch.story_text[2]
        question = batch.question[0] #[B, W]
        question_lengths = batch.question[1]
      true_start_index = batch.word_start_index
      true_end_index = batch.word_end_index
      unanswerable = batch.is_answer_absent

      with torch.no_grad():
        pred_start_index, pred_end_index, pred_ext_score, pred_int_score, attention_scores = model(document, doc_lengths, question, question_lengths)

        predictions = {'start_index': pred_start_index, 'end_index': pred_end_index, 'unanswerable_ext': pred_ext_score.squeeze(1), 'unanswerable_int': pred_int_score.squeeze(1)}
        final_pred_start, final_pred_end = self.rear_verification(predictions, evaluation_parameters)
        all_pred_text = EvaluationMetrics.formulate_answer_as_string("test", final_pred_start.data, final_pred_end.data, document.data, self.hyperparameters['vocab_itos'], self.hyperparameters['bert_tokenizer'])

        if model.sketchy_mode in ['word', 'bert']:
          true_labels = {'start_index': true_start_index.data, 'end_index': true_end_index.data, 'unanswerable_ext': unanswerable.data, 'unanswerable_int': unanswerable.daya}
        else:
          true_labels = {'start_index': true_start_index.data.float(), 'end_index': true_end_index.data.float(), 'unanswerable_ext': unanswerable.data, 'unanswerable_int': unanswerable.data}
        all_true_text = EvaluationMetrics.formulate_answer_as_string("true", true_start_index.data.unsqueeze(1), true_end_index.data.unsqueeze(1), document.data, self.hyperparameters['vocab_itos'], self.hyperparameters['bert_tokenizer'])

        # f1, result_f1 = EvaluationMetrics.f1_score(all_pred_text, all_true_text)
        # total_f1 += f1
        # # all_result_f1 += result_f1
        
        # em, result_em = EvaluationMetrics.exact_match_score(all_pred_text, all_true_text)
        # total_em += em
        # # all_result_em += result_em

        # all_true_ans += all_true_text
        # all_pred_ans += all_pred_text

        # all_true_start += true_start_index.data.tolist()
        # all_true_end += true_end_index.data.tolist()
        # print("TS: ", all_true_start)
        # print("TE: ", all_true_end)

        batch_size = document.shape[0]
        seq_length = document.shape[1]
        s = [0 for _ in range(batch_size)]
        e = [seq_length-1 for _ in range(batch_size)]
        all_doc_text = EvaluationMetrics.formulate_answer_as_string("test", s, e, document.data, self.hyperparameters['vocab_itos'], None)
        # print("D: ", all_doc_text)

        # seq_length = question.shape[1]
        # s = [0 for _ in range(batch_size)]
        # e = [seq_length-1 for _ in range(batch_size)]
        # all_question_text += EvaluationMetrics.formulate_answer_as_string("test", s, e, question.data, self.hyperparameters['vocab_itos'], None)
        # print("Q: ", all_question_text)
        

        start_pred = torch.argmax(pred_start_index, dim=1)
        all_start += start_pred
        correct_start += (torch.sum(start_pred == true_start_index)).item()
        # start_acc = (correct_start / len(true_start_index))

        end_pred = torch.argmax(pred_end_index, dim=1)
        all_end += end_pred
        correct_end += (torch.sum(end_pred == true_end_index)).item()
        # end_acc += (correct_end / len(true_end_index))

        correct_both += (torch.sum((start_pred == true_start_index) & (end_pred == true_end_index))).item()

        total += len(true_start_index)
        step = step+1

        # Move everything back to cpu
        # document = document.to('cpu')
        # question = question.to('cpu')
        # true_start_index = true_start_index.to('cpu')
        # true_end_index = true_end_index.to('cpu')
        # unanswerable = unanswerable.to('cpu')

        torch.cuda.empty_cache()
    
        
        output_dict = {'document': all_doc_text,\
                      'attention': attention_scores,\
                       'true_answer': all_true_text,\
                       'predicted_answer': all_pred_text,\
                       'accuracy': correct_both/total}
        all_output.append(output_dict)
    
    print("Saving outputs...")
    output_df = pd.DataFrame(all_output)
    output_df.to_csv('/content/drive/Shared drives/CIS 700-1 Final Project/models/' + expt_name + '.csv')
    # print("VALIDATION --> Start Acc: %s End Acc: %s Acc: %s F1: %s EM: %s" %((correct_start*100)/total, (correct_end*100)/total, (correct_both*100)/total, (total_f1 * 100)step, (total_em * 100)/step))
    
  def evaluate(self, test_iterator, expt_name = "default", load_model=False):
    if load_model:
      print("Loading model...")
      checkpoint = torch.load('/content/drive/Shared drives/CIS 700-1 Final Project/models/' + expt_name + '.pt')
      self.model.load_state_dict(checkpoint['model_state_dict'])
    evaluation_parameters = {'beta1': self.hyperparameters['beta1'],\
                             'beta2': self.hyperparameters['beta2'],\
                             'lambda1': self.hyperparameters['lambda1'],\
                             'lambda2': self.hyperparameters['lambda2'],\
                             'delta': self.hyperparameters['delta']}
    self.evaluate_classifier(self.model,\
                     test_iterator,\
                     evaluation_parameters,\
                     self.hyperparameters['vocab_itos'],\
                     expt_name)
  

In [0]:
torch.cuda.empty_cache()

# Experiments

## Expt 1 - Glove embeddings

In [0]:
log_dir = "/content/drive/Shared\ drives/CIS\ 700-1\ Final\ Project/runs"
%tensorboard --logdir /content/drive/Shared\ drives/CIS\ 700-1\ Final\ Project/runs

In [0]:
hyperparameters1 = {'vocab_size': len(FIELD.vocab),\
                   'embedding_length': 300,\
                   'word_embeddings': FIELD.vocab.vectors,\
                   'vocab_itos':  FIELD.vocab.itos,\
                   'hidden_size': 300,\
                   'num_passes': 2,\
                   'sketchy_mode': 'word',\
                   'intensive_mode': 'similarity',\
                   'pool_mode': 'max',\
                   'skip_memory': False,\
                   'num_epochs': 3,\
                   'learning_rate': 1e-2,\
                   'loss_function': retrospective_parallel_loss,\
                   'bert_encoder': None,\
                   'bert_tokenizer': None,\
                   'beta1': 0.5,\
                   'beta2': 0.5,\
                   'lambda1': 0.5,\
                   'lambda2': 0.5,\
                   'delta': 0}

expt1 = Experiment(hyperparameters1, log_dir='/content/drive/Shared drives/CIS 700-1 Final Project/runs/expt1_glove_5')
trained_model_glove = expt1.train(train_iterator)
expt1.evaluate(val_iterator)

## Expt 2 - BERT embeddings

In [0]:
from transformers import BertModel, AlbertModel
bert_encoder = BertModel.from_pretrained('bert-base-uncased')
bert_encoder = bert_encoder.to(device)
# bert_encoder = AlbertModel.from_pretrained('albert-base-v2')
# bert_encoder = bert_encoder.to(device)

hyperparameters2 = {'vocab_size': len(tokenizer.vocab),\
                   'embedding_length': bert_encoder.config.to_dict()['hidden_size'],\
                   'word_embeddings': None,\
                   'vocab_itos':  None,\
                   'hidden_size': bert_encoder.config.to_dict()['hidden_size'],\
                   'num_passes': 3,\
                   'sketchy_mode': 'bert',\
                   'intensive_mode': 'similarity',\
                   'pool_mode': 'max',\
                   'skip_memory': False,\
                   'num_epochs': 3,\
                   'learning_rate': 1e-4,\
                   'loss_function': retrospective_parallel_loss,\
                   'bert_encoder': bert_encoder,\
                   'bert_tokenizer': tokenizer,\
                   'beta1': 0.5,\
                   'beta2': 0.5,\
                   'lambda1': 0.5,\
                   'lambda2': 0.5,\
                   'delta': 0.5}

expt2 = Experiment(hyperparameters2, print_every = 20)
trained_model_bert = expt2.train(train_iterator)
expt2.evaluate(val_iterator)

## Expt 3

In [0]:
%tensorboard --logdir /content/drive/Shared\ drives/CIS\ 700-1\ Final\ Project/runs

In [0]:
hyperparameters3 = {'vocab_size': len(FIELD.vocab),\
                   'embedding_length': 300,\
                   'word_embeddings': FIELD.vocab.vectors,\
                   'vocab_itos':  FIELD.vocab.itos,\
                   'hidden_size': 300,\
                   'num_passes': 2,\
                   'sketchy_mode': 'word',\
                   'intensive_mode': 'similarity',\
                   'pool_mode': 'max',\
                   'skip_memory': False,\
                   'num_epochs': 3,\
                   'learning_rate': 1e-2,\
                   'loss_function': retrospective_parallel_loss,\
                   'bert_encoder': None,\
                   'bert_tokenizer': None,\
                   'beta1': 0.5,\
                   'beta2': 0.5,\
                   'lambda1': 0.5,\
                   'lambda2': 0.5,\
                   'delta': 0}

expt3 = Experiment(hyperparameters3, log_dir='/content/drive/Shared drives/CIS 700-1 Final Project/runs/expt3_glove2')
trained_model_glove = expt3.train(train_iterator)
expt3.evaluate(val_iterator)

--- Step: 0 Ext Loss: 0.6964178681373596 Final Loss: 3.656561851501465 Start Acc: 0.0 End Acc: 0.0 Acc: 0.0 F1: 3.3196556173026304 EM: 3.125
--- Step: 100 Ext Loss: 0.6931795654910626 Final Loss: 3.498922921643399 Start Acc: 1.8873762376237624 End Acc: 1.021039603960396 Acc: 0.09282178217821782 F1: 2.546528802970907 EM: 5.538366336633663
--- Step: 200 Ext Loss: 0.6931634545326233 Final Loss: 3.436397770744058 Start Acc: 1.3215174129353233 End Acc: 0.792910447761194 Acc: 0.07773631840796019 F1: 2.517580535961532 EM: 4.912935323383085
--- Step: 300 Ext Loss: 0.6931580485299577 Final Loss: 3.4063127991369 Start Acc: 1.173172757475083 End Acc: 0.8305647840531561 Acc: 0.07267441860465117 F1: 2.5262184987421823 EM: 4.900332225913621
--- Step: 400 Ext Loss: 0.6931553387879731 Final Loss: 3.385046112269832 Start Acc: 1.0910224438902743 End Acc: 0.8338528678304239 Acc: 0.07793017456359103 F1: 2.5535902238985027 EM: 4.800498753117207
--- Step: 500 Ext Loss: 0.6931537107793156 Final Loss: 3.37301

KeyboardInterrupt: ignored

## Expt 4 - Glove + Skip Memory

In [0]:
hyperparameters4 = {'vocab_size': len(FIELD.vocab),\
                   'embedding_length': 300,\
                   'word_embeddings': FIELD.vocab.vectors,\
                   'vocab_itos':  FIELD.vocab.itos,\
                   'hidden_size': 300,\
                   'num_passes': 2,\
                   'sketchy_mode': 'word',\
                   'intensive_mode': 'similarity',\
                   'pool_mode': 'max',\
                   'skip_memory': True,\
                   'num_epochs': 10,\
                   'learning_rate': 1e-4,\
                   'loss_function': retrospective_parallel_loss,\
                   'bert_encoder': None,\
                   'bert_tokenizer': None,\
                   'beta1': 0.5,\
                   'beta2': 0.5,\
                   'lambda1': 0.5,\
                   'lambda2': 0.5,\
                   'delta': 0}

expt4 = Experiment(hyperparameters4, log_dir='/content/drive/Shared drives/CIS 700-1 Final Project/runs/expt4_glove3')
trained_model_glove = expt4.train(train_iterator)
expt4.evaluate(val_iterator)

--- Step: 0 Ext Loss: 0.6937507390975952 Final Loss: 3.936500072479248 Start Acc: 0.0 End Acc: 3.125 Acc: 0.0 F1: 0.178216211108942 EM: 12.5
--- Step: 100 Ext Loss: 0.6932281934388793 Final Loss: 3.5481256376398673 Start Acc: 0.8044554455445545 End Acc: 0.5878712871287128 Acc: 0.03094059405940594 F1: 1.264611654507865 EM: 9.993811881188119
--- Step: 200 Ext Loss: 0.693187889471576 Final Loss: 3.501665121287256 Start Acc: 0.9172885572139303 End Acc: 0.6840796019900498 Acc: 0.07773631840796019 F1: 1.3063344971131807 EM: 8.53544776119403
--- Step: 300 Ext Loss: 0.6931743655489925 Final Loss: 3.4648907794508825 Start Acc: 1.1524086378737541 End Acc: 0.7994186046511628 Acc: 0.1972591362126246 F1: 1.4795119880420928 EM: 7.879983388704319
--- Step: 400 Ext Loss: 0.6931675867249544 Final Loss: 3.435728432828946 Start Acc: 1.4027431421446384 End Acc: 0.8104738154613467 Acc: 0.24937655860349128 F1: 1.6025694419016165 EM: 7.769638403990025
--- Step: 500 Ext Loss: 0.6931635140182967 Final Loss: 3.

AttributeError: ignored

## Expt 5 - Updated memory module

In [0]:
%tensorboard --logdir /content/drive/Shared\ drives/CIS\ 700-1\ Final\ Project/runs

In [0]:
hyperparameters5 = {'vocab_size': len(FIELD.vocab),\
                   'embedding_length': 300,\
                   'word_embeddings': FIELD.vocab.vectors,\
                   'vocab_itos':  FIELD.vocab.itos,\
                   'hidden_size': 300,\
                   'num_passes': 2,\
                   'sketchy_mode': 'word',\
                   'intensive_mode': 'similarity',\
                   'pool_mode': 'max',\
                   'skip_memory': False,\
                   'num_epochs': 10,\
                   'learning_rate': 1e-4,\
                   'loss_function': retrospective_parallel_loss,\
                   'bert_encoder': None,\
                   'bert_tokenizer': None,\
                   'beta1': 0.5,\
                   'beta2': 0.5,\
                   'lambda1': 0.5,\
                   'lambda2': 0.5,\
                   'delta': 0}

expt5 = Experiment(hyperparameters5, log_dir='/content/drive/Shared drives/CIS 700-1 Final Project/runs/expt5_glove')
trained_model_glove = expt5.train(train_iterator, expt_name='expt5_glove', load_model=True)
expt5.evaluate(val_iterator, expt_name='expt5_glove', load_model=True)

Loading model...




RuntimeError: ignored

In [0]:
expt5 = Experiment(hyperparameters5, log_dir='/content/drive/Shared drives/CIS 700-1 Final Project/runs/expt5_glove')
expt5.evaluate(val_iterator, expt_name='expt5_glove', load_model=True)

## Expt 6 - Without unanswerable

In [0]:
hyperparameters6 = {'vocab_size': len(FIELD.vocab),\
                   'embedding_length': 300,\
                   'word_embeddings': FIELD.vocab.vectors,\
                   'vocab_itos':  FIELD.vocab.itos,\
                   'hidden_size': 300,\
                   'num_passes': 2,\
                   'sketchy_mode': 'word',\
                   'intensive_mode': 'similarity',\
                   'pool_mode': 'max',\
                   'skip_memory': False,\
                   'num_epochs': 10,\
                   'learning_rate': 1e-4,\
                   'loss_function': retrospective_loss_span,\
                   'bert_encoder': None,\
                   'bert_tokenizer': None,\
                   'beta1': 0.5,\
                   'beta2': 0.5,\
                   'lambda1': 0.5,\
                   'lambda2': 0.5,\
                   'delta': 0}

expt6 = Experiment(hyperparameters6, log_dir='/content/drive/Shared drives/CIS 700-1 Final Project/runs/expt6_glove')
trained_model_glove = expt6.train(train_iterator, expt_name='expt6_glove_5', load_model=True)
expt6.evaluate(val_iterator, expt_name='expt6_glove', load_model=True)

Loading model...
--- Step: 0 Loss: 9.596662521362305 Start Acc: 12.5 End Acc: 9.375 Acc: 3.125 F1: 10.155237234566538 EM: 3.125
--- Step: 100 Loss: 9.748134258950111 Start Acc: 8.818069306930694 End Acc: 5.816831683168317 Acc: 2.258663366336634 F1: 8.33831979339612 EM: 4.672029702970297
--- Step: 200 Loss: 9.700473332286474 Start Acc: 8.61318407960199 End Acc: 5.892412935323383 Acc: 2.347636815920398 F1: 8.113869690568356 EM: 4.353233830845771
--- Step: 300 Loss: 9.673300700330259 Start Acc: 8.731312292358805 End Acc: 5.928156146179402 Acc: 2.3151993355481726 F1: 8.124684863249463 EM: 4.45390365448505
--- Step: 400 Loss: 9.657380678409947 Start Acc: 8.829488778054863 End Acc: 5.953865336658354 Acc: 2.3924563591022445 F1: 8.205733174057672 EM: 4.51215710723192
--- Step: 500 Loss: 9.66882743188245 Start Acc: 8.813622754491018 End Acc: 5.906936127744511 Acc: 2.4451097804391217 F1: 8.270117556207767 EM: 4.572105788423154
--- Step: 600 Loss: 9.67645449804982 Start Acc: 8.782237936772047 End

FileNotFoundError: ignored

## Expt 7 - Skip Memory + Verification

In [0]:
hyperparameters7 = {'vocab_size': len(FIELD.vocab),\
                   'embedding_length': 300,\
                   'word_embeddings': FIELD.vocab.vectors,\
                   'vocab_itos':  FIELD.vocab.itos,\
                   'hidden_size': 300,\
                   'num_passes': 3,\
                   'sketchy_mode': 'word',\
                   'intensive_mode': 'similarity',\
                   'pool_mode': 'max',\
                   'skip_memory': True,\
                   'num_epochs': 10,\
                   'learning_rate': 2e-3,\
                   'loss_function': retrospective_loss_span,\
                   'bert_encoder': None,\
                   'bert_tokenizer': None,\
                   'beta1': 0.5,\
                   'beta2': 0.5,\
                   'lambda1': 0.5,\
                   'lambda2': 0.5,\
                   'delta': 0}

expt7 = Experiment(hyperparameters7, log_dir='/content/drive/Shared drives/CIS 700-1 Final Project/runs/expt7_glove')
trained_model_glove = expt7.train(train_iterator, expt_name='expt7_1_glove', load_model=False)
expt7.evaluate(val_iterator, expt_name='expt7_glove', load_model=True)

--- Step: 0 Loss: 13.97938346862793 Start Acc: 0.0 End Acc: 0.0 Acc: 0.0 F1: 0.457353366416592 EM: 0.0
--- Step: 100 Loss: 12.043832977219383 Start Acc: 1.7017326732673268 End Acc: 1.4232673267326732 Acc: 0.40222772277227725 F1: 2.7393515052951787 EM: 0.9900990099009901
--- Step: 200 Loss: 11.777081128969714 Start Acc: 2.347636815920398 End Acc: 1.6480099502487562 Acc: 0.5130597014925373 F1: 2.9907315830741528 EM: 1.0883084577114428
--- Step: 300 Loss: 11.650936963154232 Start Acc: 2.8654485049833887 End Acc: 1.7753322259136213 Acc: 0.550249169435216 F1: 3.079303979584413 EM: 1.058970099667774
--- Step: 400 Loss: 11.55719116857819 Start Acc: 2.984725685785536 End Acc: 1.9560473815461346 Acc: 0.5610972568578554 F1: 3.104485228801713 EM: 1.0832294264339153
--- Step: 500 Loss: 11.48647401432791 Start Acc: 3.2559880239520957 End Acc: 2.089570858283433 Acc: 0.592564870259481 F1: 3.217511413795408 EM: 1.1477045908183632
--- Step: 600 Loss: 11.430453905051639 Start Acc: 3.3953826955074877 End

FileNotFoundError: ignored

## Expt 8 - Reweighted loss

In [0]:
hyperparameters8 = {'vocab_size': len(FIELD.vocab),\
                   'embedding_length': 300,\
                   'word_embeddings': FIELD.vocab.vectors,\
                   'vocab_itos':  FIELD.vocab.itos,\
                   'hidden_size': 300,\
                   'num_passes': 2,\
                   'sketchy_mode': 'word',\
                   'intensive_mode': 'similarity',\
                   'pool_mode': 'max',\
                   'skip_memory': False,\
                   'num_epochs': 10,\
                   'learning_rate': 1e-4,\
                   'loss_function': retrospective_parallel_loss,\
                   'bert_encoder': None,\
                   'bert_tokenizer': None,\
                   'beta1': 0.5,\
                   'beta2': 0.5,\
                   'lambda1': 0.5,\
                   'lambda2': 0.5,\
                   'delta': 0}

expt8 = Experiment(hyperparameters8, log_dir='/content/drive/Shared drives/CIS 700-1 Final Project/runs/expt8_glove_saket_1')
trained_model_glove = expt8.train(train_iterator, expt_name='expt8_glove_saket_6', load_model=True)
expt8.evaluate(val_iterator, expt_name='expt8_glove_saket_9', load_model=True)

Loading model...
--- Step: 0 Ext Loss: 0.6931471824645996 Final Loss: 1.7458192110061646 Start Acc: 0.0 End Acc: 0.0 Acc: 0.0 F1: 2.1151714564281243 EM: 9.375
--- Step: 100 Ext Loss: 1.1392034917774767 Final Loss: 2.0794503712418058 Start Acc: 8.756188118811881 End Acc: 5.631188118811881 Acc: 2.4133663366336635 F1: 6.705342052717596 EM: 12.623762376237623
--- Step: 200 Ext Loss: 1.1774140471842751 Final Loss: 2.1195582495399967 Start Acc: 8.255597014925373 End Acc: 5.301616915422885 Acc: 2.2388059701492535 F1: 6.734809265754014 EM: 12.873134328358208
--- Step: 300 Ext Loss: 1.1722455191057781 Final Loss: 2.1208479638115514 Start Acc: 7.8903654485049834 End Acc: 5.4194352159468435 Acc: 2.1802325581395348 F1: 6.605015166811358 EM: 12.925664451827242
--- Step: 400 Ext Loss: 1.1778647168318828 Final Loss: 2.1283392380003323 Start Acc: 7.761845386533666 End Acc: 5.3849750623441395 Acc: 2.158665835411471 F1: 6.498738810828334 EM: 13.037718204488778
--- Step: 500 Ext Loss: 1.179770901769459 F

In [0]:
for i in range(5, 10):
  print(i)

5
6
7
8
9


# Run as Python script

In [0]:
!rm -rf retroqa
!git clone https://github.com/karvesaket/retroqa.git

In [0]:
!pip install transformers

In [0]:
!python3 /content/retroqa/run.py