<a href="https://colab.research.google.com/github/lkwate/neural-question-generation/blob/master/Question_generation_Transformer_Bert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
#install package
!pip install torch torchvision
!pip install pytorch-transformers
!pip install pytorch-pretrained-bert



In [34]:
#Download the SQUAD train and dev dataset
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json

--2020-02-10 15:15:36--  https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json
Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.110.153, 185.199.108.153, 185.199.109.153, ...
Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.110.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 42123633 (40M) [application/json]
Saving to: ‘train-v2.0.json.1’


2020-02-10 15:15:37 (178 MB/s) - ‘train-v2.0.json.1’ saved [42123633/42123633]

--2020-02-10 15:15:39--  https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json
Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.110.153, 185.199.108.153, 185.199.109.153, ...
Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.110.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4370528 (4.2M) [application/json]
Saving to: ‘dev-v2.0.json.1’


2020-02-10 15:15:39 (38.7 MB/s) - ‘dev-v2.0.json.1’ saved [4370528/4370528]



In [35]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import json
import random
from tqdm import tqdm
from pytorch_transformers import BertTokenizer
#from transformers import BertTokenizer
from nltk.tokenize import sent_tokenize # to tokenize paragraph in sentence
import nltk # to compute BLEU score of outputs question

## import about transformer model
import torch
import torch.nn as nn 
import torch.nn.functional as F
import math 
from torch.nn.init import xavier_uniform_
from torch import optim
from pytorch_pretrained_bert import BertModel

from torch.nn import Transformer
nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [0]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## definition of constant

In [0]:
constant = {
    'd_model' : 512,  ## dimension of simple model's features
    'nhead' : 8,   ## number of head in multihead attention
    'max_question_length' : 30,  ## max length of question, 
    'number_layer' : 6,  ## depth of stack of layers
    'learning_rate' : 5e-5,  ## learning rate of optimizer
    'vocab_size' : 30522,  ## vocabulary size
    'dropout' :  0.1,  ## dropout hyperparameter for regularization
    'd_emb' : 768, ## dimension of word embeddings provide by bertModel-base-uncased
    'start_answer_token' : 1,  ## token follow by the answer spanned in the context 
    'end_answer_token' : 2,  ## the end token of the answer spanned in the context
    'pad' : 0,  ## pad token
    'cls' : 101,  ## cls token, begin token of sequence
    'sep' : 102,  ## separate token
    'mask' : 103,  ## mask token
    'batch_size' : 4, ## batch size
    'epoch' : 2 ## number of times training will be repeat
}

# Tokenizer

In [0]:
class Tokenizer():
    
    def __init__(self):
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    
    def tokenize(self, input):
        return self.tokenizer.tokenize(input)
    
    def processContextAnswer(self, context, start_answer, end_answer): 
        before_answer = self.tokenizer.encode(context[:start_answer], add_special_tokens = False)
        answer = self.tokenizer.encode(context[start_answer : end_answer], add_special_tokens = False)
        after_answer = self.tokenizer.encode(context[end_answer:], add_special_tokens = False)
        indexed_tokens = [constant['cls']] + before_answer + [constant['start_answer_token']] + answer + [constant['end_answer_token']] + after_answer + [constant['sep']]
        segments_tokens = [1] * len(indexed_tokens)
        
        return indexed_tokens, segments_tokens
    
    def processQuestion(self, question):
        question_tokens = [constant['cls']] + self.tokenizer.encode(question, add_special_tokens = False) + [constant['sep']]
        question_segments = [1] * len(question_tokens)
        
        return question_tokens, question_segments

    def decode(self, input_ids):
        output = self.tokenizer.convert_ids_to_tokens(input_ids)
        output = self.tokenizer.convert_tokens_to_string(output)
        return output

# Position encoder layer

In [0]:
class PositionEncoder(nn.Module): 
    
    def __init__(self, d_model, dropout=0.1, max_len=2500):
        super(PositionEncoder, self).__init__()
        
        self.dropout = nn.Dropout(p=dropout).cuda(device)
        pe = torch.zeros(max_len,d_model).to(device)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1).to(device)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        div_term = div_term.to(device)
        pe[:, 0::2] = torch.sin(position * div_term).to(device)
        pe[:, 1::2] = torch.cos(position * div_term).to(device)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        #shape of x : (x.size(0), d_model)
        return self.dropout(x).to(device)

# Embedding layer

In [0]:
class Embedding(nn.Module):
    
    def __init__(self, d_model = 512):
        super(Embedding, self).__init__()
        
        self.d_model = d_model
        #layer to embed tokens with BERT 
        self.bertModel = BertModel.from_pretrained('bert-base-uncased').cuda(device)
        #layer to projectize output of BERT on space of input of Transformer
        self.linear = nn.Linear(constant['d_emb'], d_model).cuda(device)
        
    def forward(self, indexed_tokens, segments_ids):
        
        #get words embedding
        encoders_layers, _ = self.bertModel(indexed_tokens, segments_ids)
        #get the output of the last hidden layers of bertModel
        encoders_layers = encoders_layers[-1].squeeze(0).to(device)
        #projectize the result on dimension of model
        encoders_layers = self.linear(encoders_layers)
        #mutiply by the square root of the d_model dimension of model
        encoders_layers = encoders_layers * torch.sqrt(torch.FloatTensor([constant['d_model']]).to(device))
        #transpose (0, 1) to change the shape to (length_sequence, batch, 512)
        return encoders_layers
        

In [0]:
class TransformerModel(nn.Module):
    
    def __init__(self, d_model = 512, vocab_size = 30522):
        super(TransformerModel, self).__init__()
        
        self.transformer = Transformer(d_model = d_model, nhead=constant['nhead'], dropout=constant['dropout'], 
                                       num_encoder_layers=constant['number_layer'], 
                                       num_decoder_layers=constant['number_layer']).cuda(device)
        #position encoder
        self.position_encoder = PositionEncoder(d_model).cuda(device)
        #embedding layer for vector representation of the combination of context and answer
        self.embedding = Embedding(d_model).cuda(device)
        # layer for the projection of the output of transformer decoder on the space which the dimension is the size of vocabulary
        self.linear = nn.Linear(d_model, vocab_size).cuda(device)
        #compute log of the probality distribution
        self.log_softmax = nn.LogSoftmax(dim = -1).cuda(device)
        
    def forward(self, indexed_tokens_context = None, segments_ids_context = None, indexed_tokens_question = None, segments_ids_question = None, memory = None, indexed_tokens = None, segments_ids = None, decode = False):
        
        if decode : 
            indexed_tokens = torch.tensor([indexed_tokens]).to(device)
            segments_ids = torch.tensor([segments_ids]).to(device)
            tgt = self.embedding(indexed_tokens, segments_ids)
            tgt = tgt.unsqueeze(1)
            tgt = self.position_encoder(tgt)
            #generate mask 
            tgt_mask = self.transformer.generate_square_subsequent_mask(tgt.shape[0]).to(device)
            output = self.transformer.decoder(tgt, memory, tgt_mask = tgt_mask)
            output = output.squeeze(1)
            output = self.linear(output)
            output = self.log_softmax(output)

            del indexed_tokens, segments_ids
            return output
        else :
            #convert to indices and segment to tensor
            indexed_tokens_context = torch.tensor([indexed_tokens_context]).to(device)
            segments_ids_context = torch.tensor([segments_ids_context]).to(device)
            indexed_tokens_question = torch.tensor([indexed_tokens_question]).to(device)
            segments_ids_question = torch.tensor([segments_ids_question]).to(device)

            # get embedding representation of context and answer combination
            src = self.embedding(indexed_tokens_context, segments_ids_context)

            src = src.unsqueeze(1)
            # add position features
            src = self.position_encoder(src)

            #get embedding representation of question
            tgt = self.embedding(indexed_tokens_question, segments_ids_question)

            tgt = tgt.unsqueeze(1)
            # add position features
            tgt = self.position_encoder(tgt)
            #generate mask
            tgt_mask = self.transformer.generate_square_subsequent_mask(tgt.shape[0]).to(device)
            #compute memory 
            memory = self.transformer.encoder(src)
            #output
            output = self.transformer.decoder(tgt, memory, tgt_mask = tgt_mask)
            output = output.squeeze(1)
            output = self.linear(output)
            output = self.log_softmax(output)

            del indexed_tokens_context, indexed_tokens_question, segments_ids_context, segments_ids_question
            # return memory, and log of probabilities distribution of next tokens
            return memory, output

## Building of dataset based on SQUAD 2.0

In [0]:
#dataset
class Dataset(): 
    '''
        rule of validation of data in dataset: 
            context cannot be null
            answer a question regarding the context cannot be impossible
    '''
    def __init__(self, path_to_dataset): 
        self.dataset = []
        with open(path_to_dataset) as json_file: 

            data = json.load(json_file)
            for batch_data in data['data']: 
            
                for paragraph in batch_data['paragraphs']:
                    if paragraph['context']: 
                        context = paragraph['context']
                        
                        # loop over question and answers for a given context
                        if len(paragraph['qas']) != 0:
                            for qas in paragraph['qas']: 
                                if not qas['is_impossible'] : 
                                    question = qas['question']
                                    
                                    #ignore question which length is less than 3
                                    if len(question) <= 2 :
                                        continue
                                        
                                    # loop over answers
                                    length_answer = -1
                                    start_answer = 10e4
                                    end_answer = None
                                    for ans in qas['answers']: 
                                        if ans['answer_start'] < start_answer:
                                            start_answer = ans['answer_start']
                                        if length_answer <= len(ans['text']): 
                                            end_answer = start_answer + len(ans['text'])
                                        response = context[start_answer : end_answer]
                                        
                                    if response == '':
                                        continue
                                        
                                    index = 0
                                    for sentence in sent_tokenize(context):
                                        j = sentence.find(response)
                                        if j != -1 and index <= start_answer and (index + len(sentence))>= end_answer: 
                                            self.dataset.append((sentence, question, j, j + len(response)))
                                            break
                                        index += len(sentence)
        #shuffle item in dataset
        random.shuffle(self.dataset)
        

## train

In [0]:
def trainStep(model, optimizer, criterion, tokenizer, batch):
    
    optimizer.zero_grad()
    loss = 0
    for batch_item in batch:
        context = batch_item[0]
        question = batch_item[1]
        start_answer = batch_item[2]
        end_answer = batch_item[3]
        
        question_predicted = []
        indexed_tokens_question, segments_ids_question = tokenizer.processQuestion(question)
        indexed_tokens_context, segments_ids_context = tokenizer.processContextAnswer(context, start_answer, end_answer)
        
        memory, output = model(indexed_tokens_context=indexed_tokens_context, segments_ids_context=segments_ids_context, indexed_tokens_question=[indexed_tokens_question[0]], segments_ids_question=[segments_ids_question[0]])
        
        for qi in range(1, len(indexed_tokens_question) - 1):
            target_index = torch.argmax(output[-1, :]).item()
            question_predicted.append(target_index)
            loss += criterion(output[-1, :].unsqueeze(0), torch.LongTensor([indexed_tokens_question[qi]]).to(device))
            output = model(memory = memory, indexed_tokens=indexed_tokens_question[: qi + 1], segments_ids=segments_ids_question[: qi + 1], decode = True)
        #last tokens
        loss += criterion(output[-1, :].unsqueeze(0), torch.LongTensor([indexed_tokens_question[-1]]).to(device))
        target_index = torch.argmax(output[-1, :]).item()
        question_predicted.append(target_index)
        
    #compute gradient of loss function
    loss.backward()
    
    #update parameter
    optimizer.step()
    
    del indexed_tokens_context, indexed_tokens_question, output
    
    return loss, question_predicted, question

## train Iter

In [0]:
def trainTBertIter(dataset, model, optimizer, criterion, tokenizer, num_epoch = 10, period_display = 17): 
    """
        loop over dataset and train on each sample.
    """
    iter = 1
    plot_losses = []
    plot_losses_epoch = []
    plot_total_loss = 0
    plot_total_loss_epoch = 0
    
    #number of iteration in one batch
    batch_size = constant['batch_size']
    niter = len(dataset) // batch_size
    remain = len(dataset) == niter * batch_size
    
    for ep in tqdm(range(constant['epoch'])): 
        for j in tqdm(range(niter)):
            batch = dataset[j * batch_size : (j + 1) * batch_size]
            loss, last_predicted, last_truth = trainStep(model, optimizer, criterion, tokenizer, batch)
            plot_total_loss += loss.item()
            plot_total_loss_epoch += loss.item()
            
            if j % period_display == 1:
                avg_loss = plot_total_loss / (2 * batch_size)
                plot_losses.append(avg_loss)
                last_predicted = tokenizer.decode(last_predicted)
                print("loss = %.7f " %(avg_loss))
                print("predicted : ", last_predicted)
                print("truth : ", last_truth)
                plot_total_loss = 0
        if remain :
            batch = dataset[niter * batch_size :]
            loss, last_predicted, last_truth = trainStep(model, optimizer, criterion, tokenizer, batch)
            plot_total_loss = loss.item()
            plot_total_loss_epoch += loss.item()
            avg_loss = plot_total_loss / (len(dataset) - niter * batch_size)
            plot_losses.append(avg_loss)
            plot_total_loss = 0
            
            
        plot_losses_epoch.append(plot_total_loss_epoch)
        plot_total_loss_epoch = 0
        
        #save model
        pathCheckpoint = './checkpointModel.pth'   
        checkPointModel = {
            'model' : model, 
            'optimizer' : optimizer, 
            'model_state_dict' : model.state_dict(), 
            'optimizer_state_dict' : optimizer.state_dict(),
            'epoch' : ep, 
            'plot_losses' : plot_losses, 
            'plot_losses_epoch' : plot_losses_epoch
        }
        torch.save(checkPointModel, pathCheckpoint)
    return plot_losses, 

## evaluate

In [0]:
def evaluate(model, context, start_answer, end_answer, tokenizer, max_length = constant['max_question_length']):
    question_predicted = []
    with torch.no_grad():
        indexed_tokens_context, segments_ids_context = tokenizer.processContextAnswer(context, start_answer, end_answer)
        indexed_tokens_question = [constant['cls']]
        segments_ids_question  = [1]
        memory, output = model(indexed_tokens_context=indexed_tokens_context, segments_ids_context=segments_ids_context, indexed_tokens_question=indexed_tokes_question, segments_ids_question=segments_ids_question)
        for i in range(max_length):
            target_index = torch.argmax(output[-1, :]).item()
            question_predicted.append(target_index)
            if target_index == 102:
                break
            indexed_tokens_question.append(target_index)
            segments_ids_question.append(1)
            output = model(memory = memory, indexed_tokens=indexed_tokens_question, segments_ids=segments_ids_question, decode = True)

        question_predicted = tokenizer.tokenizer.decode(question_predicted)
    return question_predicted

In [0]:
model = TransformerModel()
#dataParallel
model = nn.DataParallel(model)
model = model.to(device)

In [0]:
adamOptimizer = optim.Adam(model.parameters(), lr=constant['learning_rate'])

In [0]:
criterion = nn.CrossEntropyLoss()

In [0]:
tokenizer = Tokenizer()

In [0]:
# load dataset 
dataset = Dataset('./train-v2.0.json')

In [0]:
#trainIter
trainTBertIter(dataset.dataset, model, adamOptimizer, criterion, tokenizer)





  0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A




  0%|          | 0/19417 [00:00<?, ?it/s][A[A[A[A[A




  0%|          | 1/19417 [00:03<17:35:22,  3.26s/it][A[A[A[A[A




  0%|          | 2/19417 [00:06<17:27:14,  3.24s/it][A[A[A[A[A

loss = 114.5551643 
predicted :  [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP]
truth :  Where can you obtain information about the Sunshine Act?







  0%|          | 3/19417 [00:10<18:32:03,  3.44s/it][A[A[A[A[A




  0%|          | 4/19417 [00:13<18:26:21,  3.42s/it][A[A[A[A[A




  0%|          | 5/19417 [00:17<18:27:31,  3.42s/it][A[A[A[A[A




  0%|          | 6/19417 [00:20<18:56:30,  3.51s/it][A[A[A[A[A




  0%|          | 7/19417 [00:24<19:28:38,  3.61s/it][A[A[A[A[A




  0%|          | 8/19417 [00:28<20:03:59,  3.72s/it][A[A[A[A[A




  0%|          | 9/19417 [00:31<18:58:34,  3.52s/it][A[A[A[A[A




  0%|          | 10/19417 [00:34<18:04:27,  3.35s/it][A[A[A[A[A




  0%|          | 11/19417 [00:38<19:20:20,  3.59s/it][A[A[A[A[A




  0%|          | 12/19417 [00:41<18:08:17,  3.36s/it][A[A[A[A[A




  0%|          | 13/19417 [00:44<17:52:01,  3.31s/it][A[A[A[A[A




  0%|          | 14/19417 [00:47<17:27:06,  3.24s/it][A[A[A[A[A




  0%|          | 15/19417 [00:51<17:16:24,  3.21s/it][A[A[A[A[A




  0%|          | 16/19417 [00:54<17:16:11,  3.20s/it]

loss = 989.1640053 
predicted :  [SEP] [SEP] [SEP] [SEP] ? [SEP] [SEP] ? [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] ? ? [SEP] [SEP] [SEP] ?
truth :  Along with the PS Slim's value, quietness, and lower power usage, what superficial quality did PC World praise?







  0%|          | 20/19417 [01:09<19:49:01,  3.68s/it][A[A[A[A[A




  0%|          | 21/19417 [01:13<19:50:58,  3.68s/it][A[A[A[A[A




  0%|          | 22/19417 [01:16<19:25:25,  3.61s/it][A[A[A[A[A




  0%|          | 23/19417 [01:20<19:12:34,  3.57s/it][A[A[A[A[A




  0%|          | 24/19417 [01:24<19:54:21,  3.70s/it][A[A[A[A[A




  0%|          | 25/19417 [01:28<21:07:12,  3.92s/it][A[A[A[A[A




  0%|          | 26/19417 [01:33<22:10:21,  4.12s/it][A[A[A[A[A




  0%|          | 27/19417 [01:36<21:28:24,  3.99s/it][A[A[A[A[A




  0%|          | 28/19417 [01:40<20:04:24,  3.73s/it][A[A[A[A[A




  0%|          | 29/19417 [01:44<20:35:52,  3.82s/it][A[A[A[A[A




  0%|          | 30/19417 [01:46<19:00:19,  3.53s/it][A[A[A[A[A




  0%|          | 31/19417 [01:50<19:18:36,  3.59s/it][A[A[A[A[A




  0%|          | 32/19417 [01:54<19:04:02,  3.54s/it][A[A[A[A[A




  0%|          | 33/19417 [01:58<19:49:34,  3.

loss = 940.4951820 
predicted :  [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP] [SEP]
truth :  What patent holding company sued Apple in 2007?







  0%|          | 37/19417 [02:10<16:48:22,  3.12s/it][A[A[A[A[A




  0%|          | 38/19417 [02:13<17:25:24,  3.24s/it][A[A[A[A[A




  0%|          | 39/19417 [02:17<17:24:20,  3.23s/it][A[A[A[A[A




  0%|          | 40/19417 [02:21<18:38:56,  3.46s/it][A[A[A[A[A




  0%|          | 41/19417 [02:24<18:41:16,  3.47s/it][A[A[A[A[A




  0%|          | 42/19417 [02:27<17:31:19,  3.26s/it][A[A[A[A[A




  0%|          | 43/19417 [02:30<16:51:01,  3.13s/it][A[A[A[A[A




  0%|          | 44/19417 [02:33<17:13:31,  3.20s/it][A[A[A[A[A




  0%|          | 45/19417 [02:37<19:00:51,  3.53s/it][A[A[A[A[A




  0%|          | 46/19417 [02:42<20:38:54,  3.84s/it][A[A[A[A[A




  0%|          | 47/19417 [02:46<20:30:31,  3.81s/it][A[A[A[A[A




  0%|          | 48/19417 [02:49<20:27:10,  3.80s/it][A[A[A[A[A




  0%|          | 49/19417 [02:53<20:18:23,  3.77s/it][A[A[A[A[A




  0%|          | 50/19417 [02:57<20:07:47,  3.

loss = 902.8892937 
predicted :  what ? the ? [SEP] [SEP] [SEP] [SEP] [SEP] ? ? [SEP] [SEP] [SEP]
truth :  When did the nationwide strike by air traffic controlers union happen?







  0%|          | 54/19417 [03:10<18:27:46,  3.43s/it][A[A[A[A[A




  0%|          | 55/19417 [03:13<18:04:45,  3.36s/it][A[A[A[A[A




  0%|          | 56/19417 [03:16<17:54:05,  3.33s/it][A[A[A[A[A




  0%|          | 57/19417 [03:19<17:21:10,  3.23s/it][A[A[A[A[A




  0%|          | 58/19417 [03:22<17:10:59,  3.20s/it][A[A[A[A[A




  0%|          | 59/19417 [03:26<17:17:02,  3.21s/it][A[A[A[A[A




  0%|          | 60/19417 [03:29<17:46:13,  3.30s/it][A[A[A[A[A




  0%|          | 61/19417 [03:33<18:00:27,  3.35s/it][A[A[A[A[A




  0%|          | 62/19417 [03:36<17:40:32,  3.29s/it][A[A[A[A[A




  0%|          | 63/19417 [03:41<21:00:31,  3.91s/it][A[A[A[A[A




  0%|          | 64/19417 [03:44<19:37:52,  3.65s/it][A[A[A[A[A




  0%|          | 65/19417 [03:48<19:32:03,  3.63s/it][A[A[A[A[A




  0%|          | 66/19417 [03:51<18:59:40,  3.53s/it][A[A[A[A[A




  0%|          | 67/19417 [03:55<19:43:45,  3.

loss = 848.8500366 
predicted :  what ? ? ? ? ? ? ? ? ? ? ? ? ? [SEP]
truth :  What kind of microscope shows that q-glass grows as separate particles?







  0%|          | 71/19417 [04:10<19:50:38,  3.69s/it][A[A[A[A[A




  0%|          | 72/19417 [04:15<21:53:40,  4.07s/it][A[A[A[A[A




  0%|          | 73/19417 [04:18<20:52:12,  3.88s/it][A[A[A[A[A




  0%|          | 74/19417 [04:22<19:54:18,  3.70s/it][A[A[A[A[A




  0%|          | 75/19417 [04:26<21:02:09,  3.92s/it][A[A[A[A[A




  0%|          | 76/19417 [04:31<23:07:56,  4.31s/it][A[A[A[A[A




  0%|          | 77/19417 [04:35<23:08:31,  4.31s/it][A[A[A[A[A




  0%|          | 78/19417 [04:39<21:47:13,  4.06s/it][A[A[A[A[A




  0%|          | 79/19417 [04:42<20:38:34,  3.84s/it][A[A[A[A[A




  0%|          | 80/19417 [04:45<19:02:54,  3.55s/it][A[A[A[A[A




  0%|          | 81/19417 [04:49<18:54:44,  3.52s/it][A[A[A[A[A




  0%|          | 82/19417 [04:52<19:08:48,  3.56s/it][A[A[A[A[A




  0%|          | 83/19417 [04:55<18:09:47,  3.38s/it][A[A[A[A[A




  0%|          | 84/19417 [04:59<18:04:12,  3.

loss = 857.0459404 
predicted :  what what what what what the what ? the ? ? ? ? ? ? ? ? [SEP]
truth :  During the 1980s and 1990s, how many people were killed annually because of dog bites?







  0%|          | 88/19417 [05:12<18:41:08,  3.48s/it][A[A[A[A[A




  0%|          | 89/19417 [05:15<17:39:39,  3.29s/it][A[A[A[A[A




  0%|          | 90/19417 [05:19<18:11:27,  3.39s/it][A[A[A[A[A




  0%|          | 91/19417 [05:22<18:30:27,  3.45s/it][A[A[A[A[A




  0%|          | 92/19417 [05:26<18:41:51,  3.48s/it][A[A[A[A[A




  0%|          | 93/19417 [05:30<19:11:51,  3.58s/it][A[A[A[A[A




  0%|          | 94/19417 [05:33<18:18:24,  3.41s/it][A[A[A[A[A




  0%|          | 95/19417 [05:37<19:44:59,  3.68s/it][A[A[A[A[A




  0%|          | 96/19417 [05:41<19:38:10,  3.66s/it][A[A[A[A[A




  0%|          | 97/19417 [05:44<19:47:47,  3.69s/it][A[A[A[A[A




  1%|          | 98/19417 [05:48<19:18:57,  3.60s/it][A[A[A[A[A




  1%|          | 99/19417 [05:51<18:25:46,  3.43s/it][A[A[A[A[A




  1%|          | 100/19417 [05:54<18:40:15,  3.48s/it][A[A[A[A[A




  1%|          | 101/19417 [05:58<19:26:49,  

loss = 810.6799927 
predicted :  what the the the the the the the the the the the the the the the the [SEP]
truth :  If the brewery tap is not located in the brewery, where is it usually located?







  1%|          | 105/19417 [06:13<19:41:42,  3.67s/it][A[A[A[A[A




  1%|          | 106/19417 [06:16<18:39:15,  3.48s/it][A[A[A[A[A




  1%|          | 107/19417 [06:19<17:31:37,  3.27s/it][A[A[A[A[A




  1%|          | 108/19417 [06:22<16:30:47,  3.08s/it][A[A[A[A[A




  1%|          | 109/19417 [06:25<16:45:50,  3.13s/it][A[A[A[A[A




  1%|          | 110/19417 [06:29<18:35:09,  3.47s/it][A[A[A[A[A




  1%|          | 111/19417 [06:33<18:55:19,  3.53s/it][A[A[A[A[A




  1%|          | 112/19417 [06:37<18:54:53,  3.53s/it][A[A[A[A[A




  1%|          | 113/19417 [06:41<20:02:33,  3.74s/it][A[A[A[A[A




  1%|          | 114/19417 [06:45<20:23:41,  3.80s/it][A[A[A[A[A




  1%|          | 115/19417 [06:49<20:34:49,  3.84s/it][A[A[A[A[A




  1%|          | 116/19417 [06:53<21:24:13,  3.99s/it][A[A[A[A[A




  1%|          | 117/19417 [06:57<21:00:25,  3.92s/it][A[A[A[A[A




  1%|          | 118/19417 [07:01

loss = 800.2819080 
predicted :  what the the the the the the the the the the [SEP]
truth :  The Romans tried to affix by avoiding what?







  1%|          | 122/19417 [07:16<20:25:33,  3.81s/it][A[A[A[A[A




  1%|          | 123/19417 [07:20<19:30:32,  3.64s/it][A[A[A[A[A




  1%|          | 124/19417 [07:23<18:50:34,  3.52s/it][A[A[A[A[A




  1%|          | 125/19417 [07:26<18:12:51,  3.40s/it][A[A[A[A[A




  1%|          | 126/19417 [07:30<18:52:27,  3.52s/it][A[A[A[A[A




  1%|          | 127/19417 [07:33<18:32:07,  3.46s/it][A[A[A[A[A




  1%|          | 128/19417 [07:36<18:12:28,  3.40s/it][A[A[A[A[A




  1%|          | 129/19417 [07:40<19:02:48,  3.56s/it][A[A[A[A[A




  1%|          | 130/19417 [07:43<18:18:47,  3.42s/it][A[A[A[A[A




  1%|          | 131/19417 [07:47<18:49:19,  3.51s/it][A[A[A[A[A




  1%|          | 132/19417 [07:50<18:37:14,  3.48s/it][A[A[A[A[A




  1%|          | 133/19417 [07:54<19:18:42,  3.61s/it][A[A[A[A[A




  1%|          | 134/19417 [07:58<19:33:25,  3.65s/it][A[A[A[A[A




  1%|          | 135/19417 [08:02

loss = 731.6521645 
predicted :  what the the the the the the the the ? ? the the ? [SEP]
truth :  What denomination are the majority of Guinea-Bissau's muslims?







  1%|          | 139/19417 [08:16<19:07:21,  3.57s/it][A[A[A[A[A




  1%|          | 140/19417 [08:18<17:39:14,  3.30s/it][A[A[A[A[A




  1%|          | 141/19417 [08:21<16:50:52,  3.15s/it][A[A[A[A[A




  1%|          | 142/19417 [08:25<18:08:49,  3.39s/it][A[A[A[A[A




  1%|          | 143/19417 [08:28<17:46:52,  3.32s/it][A[A[A[A[A




  1%|          | 144/19417 [08:31<16:48:30,  3.14s/it][A[A[A[A[A




  1%|          | 145/19417 [08:34<16:58:55,  3.17s/it][A[A[A[A[A




  1%|          | 146/19417 [08:38<18:30:09,  3.46s/it][A[A[A[A[A




  1%|          | 147/19417 [08:42<18:59:15,  3.55s/it][A[A[A[A[A




  1%|          | 148/19417 [08:45<18:21:00,  3.43s/it][A[A[A[A[A




  1%|          | 149/19417 [08:50<20:55:45,  3.91s/it][A[A[A[A[A




  1%|          | 150/19417 [08:54<21:08:34,  3.95s/it][A[A[A[A[A




  1%|          | 151/19417 [08:57<19:40:37,  3.68s/it][A[A[A[A[A




  1%|          | 152/19417 [09:01

loss = 759.3106728 
predicted :  what the the the the the ? the ? ? the the the ? ? the the ? ? ? the the ? the ? the [SEP] [SEP]
truth :  during this time the relationship change between Britian and the EIC. the EIC became more of a what to the crown??







  1%|          | 156/19417 [09:16<20:27:44,  3.82s/it][A[A[A[A[A




  1%|          | 157/19417 [09:19<19:12:50,  3.59s/it][A[A[A[A[A




  1%|          | 158/19417 [09:23<19:58:34,  3.73s/it][A[A[A[A[A




  1%|          | 159/19417 [09:27<20:16:17,  3.79s/it][A[A[A[A[A




  1%|          | 160/19417 [09:31<19:40:28,  3.68s/it][A[A[A[A[A




  1%|          | 161/19417 [09:34<19:19:09,  3.61s/it][A[A[A[A[A




  1%|          | 162/19417 [09:37<18:00:16,  3.37s/it][A[A[A[A[A




  1%|          | 163/19417 [09:41<18:43:29,  3.50s/it][A[A[A[A[A




  1%|          | 164/19417 [09:44<18:39:14,  3.49s/it][A[A[A[A[A




  1%|          | 165/19417 [09:47<17:15:05,  3.23s/it][A[A[A[A[A




  1%|          | 166/19417 [09:51<19:36:38,  3.67s/it][A[A[A[A[A




  1%|          | 167/19417 [09:55<19:51:35,  3.71s/it][A[A[A[A[A




  1%|          | 168/19417 [09:59<19:35:00,  3.66s/it][A[A[A[A[A




  1%|          | 169/19417 [10:02

loss = 733.5765381 
predicted :  what the the the the the the the the the the [SEP]
truth :  Which 21st century administration denied funding to UNFPA?







  1%|          | 173/19417 [10:17<19:11:47,  3.59s/it][A[A[A[A[A




  1%|          | 174/19417 [10:20<19:18:42,  3.61s/it][A[A[A[A[A




  1%|          | 175/19417 [10:24<20:18:24,  3.80s/it][A[A[A[A[A




  1%|          | 176/19417 [10:29<20:42:07,  3.87s/it][A[A[A[A[A




  1%|          | 177/19417 [10:32<19:50:17,  3.71s/it][A[A[A[A[A




  1%|          | 178/19417 [10:36<20:04:21,  3.76s/it][A[A[A[A[A




  1%|          | 179/19417 [10:39<19:45:21,  3.70s/it][A[A[A[A[A




  1%|          | 180/19417 [10:43<19:44:32,  3.69s/it][A[A[A[A[A




  1%|          | 181/19417 [10:47<20:11:47,  3.78s/it][A[A[A[A[A




  1%|          | 182/19417 [10:51<20:10:08,  3.77s/it][A[A[A[A[A




  1%|          | 183/19417 [10:54<18:37:31,  3.49s/it][A[A[A[A[A




  1%|          | 184/19417 [10:58<20:32:06,  3.84s/it][A[A[A[A[A




  1%|          | 185/19417 [11:01<18:46:01,  3.51s/it][A[A[A[A[A




  1%|          | 186/19417 [11:06

loss = 758.0620384 
predicted :  what the the the the ? the the ?
truth :  What is the total area of the levels 







  1%|          | 190/19417 [11:21<19:41:15,  3.69s/it][A[A[A[A[A




  1%|          | 191/19417 [11:24<20:01:16,  3.75s/it][A[A[A[A[A




  1%|          | 192/19417 [11:28<20:04:10,  3.76s/it][A[A[A[A[A




  1%|          | 193/19417 [11:31<19:01:43,  3.56s/it][A[A[A[A[A




  1%|          | 194/19417 [11:35<18:35:05,  3.48s/it][A[A[A[A[A




  1%|          | 195/19417 [11:39<19:50:51,  3.72s/it][A[A[A[A[A




  1%|          | 196/19417 [11:43<20:16:13,  3.80s/it][A[A[A[A[A




  1%|          | 197/19417 [11:46<18:59:16,  3.56s/it][A[A[A[A[A




  1%|          | 198/19417 [11:49<17:45:11,  3.33s/it][A[A[A[A[A




  1%|          | 199/19417 [11:53<18:35:53,  3.48s/it][A[A[A[A[A




  1%|          | 200/19417 [11:56<17:59:34,  3.37s/it][A[A[A[A[A




  1%|          | 201/19417 [11:59<18:22:34,  3.44s/it][A[A[A[A[A




  1%|          | 202/19417 [12:02<17:35:17,  3.30s/it][A[A[A[A[A




  1%|          | 203/19417 [12:06

loss = 688.9185734 
predicted :  what the the the ? the the ? [SEP]
truth :  What was the origin of the war?







  1%|          | 207/19417 [12:19<17:41:53,  3.32s/it][A[A[A[A[A




  1%|          | 208/19417 [12:24<19:40:05,  3.69s/it][A[A[A[A[A




  1%|          | 209/19417 [12:27<18:30:58,  3.47s/it][A[A[A[A[A




  1%|          | 210/19417 [12:30<18:28:30,  3.46s/it][A[A[A[A[A




  1%|          | 211/19417 [12:33<18:02:48,  3.38s/it][A[A[A[A[A




  1%|          | 212/19417 [12:37<17:46:15,  3.33s/it][A[A[A[A[A




  1%|          | 213/19417 [12:40<18:08:31,  3.40s/it][A[A[A[A[A




  1%|          | 214/19417 [12:43<17:31:38,  3.29s/it][A[A[A[A[A




  1%|          | 215/19417 [12:47<17:25:58,  3.27s/it][A[A[A[A[A




  1%|          | 216/19417 [12:49<16:55:13,  3.17s/it][A[A[A[A[A




  1%|          | 217/19417 [12:54<19:01:32,  3.57s/it][A[A[A[A[A




  1%|          | 218/19417 [12:57<17:57:51,  3.37s/it][A[A[A[A[A




  1%|          | 219/19417 [13:00<17:47:55,  3.34s/it][A[A[A[A[A




  1%|          | 220/19417 [13:03

loss = 706.0238552 
predicted :  what what is is the the the the the the the the the the ? the ? ? the ? ? [SEP]
truth :  In which year did C.E. Raven publish the remarks on creation-evolution quoted by Popper?







  1%|          | 224/19417 [13:18<18:38:50,  3.50s/it][A[A[A[A[A




  1%|          | 225/19417 [13:21<17:51:02,  3.35s/it][A[A[A[A[A




  1%|          | 226/19417 [13:24<18:07:22,  3.40s/it][A[A[A[A[A




  1%|          | 227/19417 [13:27<17:34:46,  3.30s/it][A[A[A[A[A




  1%|          | 228/19417 [13:30<16:52:51,  3.17s/it][A[A[A[A[A




  1%|          | 229/19417 [13:33<17:07:05,  3.21s/it][A[A[A[A[A




  1%|          | 230/19417 [13:37<18:27:28,  3.46s/it][A[A[A[A[A




  1%|          | 231/19417 [13:41<18:29:40,  3.47s/it][A[A[A[A[A




  1%|          | 232/19417 [13:44<18:36:26,  3.49s/it][A[A[A[A[A




  1%|          | 233/19417 [13:48<18:32:07,  3.48s/it][A[A[A[A[A




  1%|          | 234/19417 [13:51<18:11:32,  3.41s/it][A[A[A[A[A




  1%|          | 235/19417 [13:55<18:52:27,  3.54s/it][A[A[A[A[A




  1%|          | 236/19417 [13:59<19:17:30,  3.62s/it][A[A[A[A[A




  1%|          | 237/19417 [14:02

loss = 668.4002438 
predicted :  what the ? ? the the ? ? the ? ? ? ? ? ? [SEP]
truth :  The principle facade of the East Front is made from what type of stone?







  1%|          | 241/19417 [14:15<17:34:57,  3.30s/it][A[A[A[A[A




  1%|          | 242/19417 [14:19<17:26:46,  3.28s/it][A[A[A[A[A




  1%|▏         | 243/19417 [14:22<17:42:17,  3.32s/it][A[A[A[A[A




  1%|▏         | 244/19417 [14:26<18:19:13,  3.44s/it][A[A[A[A[A




  1%|▏         | 245/19417 [14:29<17:29:19,  3.28s/it][A[A[A[A[A




  1%|▏         | 246/19417 [14:32<16:51:54,  3.17s/it][A[A[A[A[A




  1%|▏         | 247/19417 [14:35<17:27:51,  3.28s/it][A[A[A[A[A




  1%|▏         | 248/19417 [14:38<17:07:29,  3.22s/it][A[A[A[A[A




  1%|▏         | 249/19417 [14:43<19:42:41,  3.70s/it][A[A[A[A[A




  1%|▏         | 250/19417 [14:46<19:11:57,  3.61s/it][A[A[A[A[A




  1%|▏         | 251/19417 [14:50<19:13:49,  3.61s/it][A[A[A[A[A




  1%|▏         | 252/19417 [14:53<18:39:34,  3.51s/it][A[A[A[A[A




  1%|▏         | 253/19417 [14:57<18:17:16,  3.44s/it][A[A[A[A[A




  1%|▏         | 254/19417 [14:59

loss = 669.5365963 
predicted :  what did the the the the the the ? the the ? [SEP]
truth :  Who was ultimately able to claim victoy in Port Said?







  1%|▏         | 258/19417 [15:13<17:30:44,  3.29s/it][A[A[A[A[A




  1%|▏         | 259/19417 [15:17<19:13:13,  3.61s/it][A[A[A[A[A




  1%|▏         | 260/19417 [15:21<19:47:04,  3.72s/it][A[A[A[A[A




  1%|▏         | 261/19417 [15:25<20:06:36,  3.78s/it][A[A[A[A[A




  1%|▏         | 262/19417 [15:29<19:40:02,  3.70s/it][A[A[A[A[A




  1%|▏         | 263/19417 [15:32<18:54:11,  3.55s/it][A[A[A[A[A




  1%|▏         | 264/19417 [15:36<20:09:16,  3.79s/it][A[A[A[A[A




  1%|▏         | 265/19417 [15:40<20:25:38,  3.84s/it][A[A[A[A[A




  1%|▏         | 266/19417 [15:45<21:25:59,  4.03s/it][A[A[A[A[A




  1%|▏         | 267/19417 [15:48<20:18:11,  3.82s/it][A[A[A[A[A




  1%|▏         | 268/19417 [15:52<20:57:13,  3.94s/it][A[A[A[A[A




  1%|▏         | 269/19417 [15:56<20:59:04,  3.95s/it][A[A[A[A[A




  1%|▏         | 270/19417 [16:00<21:02:55,  3.96s/it][A[A[A[A[A




  1%|▏         | 271/19417 [16:04

loss = 791.8192978 
predicted :  what the was was was of ? of ? [SEP]
truth :  Did the CAFS accelerator work as planned?







  1%|▏         | 275/19417 [16:19<19:50:06,  3.73s/it][A[A[A[A[A




  1%|▏         | 276/19417 [16:22<18:26:27,  3.47s/it][A[A[A[A[A




  1%|▏         | 277/19417 [16:25<18:14:04,  3.43s/it][A[A[A[A[A




  1%|▏         | 278/19417 [16:29<18:38:43,  3.51s/it][A[A[A[A[A




  1%|▏         | 279/19417 [16:32<18:29:39,  3.48s/it][A[A[A[A[A




  1%|▏         | 280/19417 [16:35<17:52:40,  3.36s/it][A[A[A[A[A




  1%|▏         | 281/19417 [16:38<17:41:49,  3.33s/it][A[A[A[A[A




  1%|▏         | 282/19417 [16:42<17:36:27,  3.31s/it][A[A[A[A[A




  1%|▏         | 283/19417 [16:46<18:19:06,  3.45s/it][A[A[A[A[A




  1%|▏         | 284/19417 [16:49<18:04:44,  3.40s/it][A[A[A[A[A




  1%|▏         | 285/19417 [16:53<18:40:29,  3.51s/it][A[A[A[A[A




  1%|▏         | 286/19417 [16:57<19:44:29,  3.71s/it][A[A[A[A[A




  1%|▏         | 287/19417 [17:01<20:01:38,  3.77s/it][A[A[A[A[A




  1%|▏         | 288/19417 [17:04

loss = 669.3138847 
predicted :  what is is the the the the the of [SEP]
truth :  What river was adjacent to HMNB Davenport?







  2%|▏         | 292/19417 [17:18<18:17:24,  3.44s/it][A[A[A[A[A




  2%|▏         | 293/19417 [17:22<19:41:50,  3.71s/it][A[A[A[A[A




  2%|▏         | 294/19417 [17:25<18:57:36,  3.57s/it][A[A[A[A[A




  2%|▏         | 295/19417 [17:29<19:51:04,  3.74s/it][A[A[A[A[A




  2%|▏         | 296/19417 [17:33<19:09:53,  3.61s/it][A[A[A[A[A




  2%|▏         | 297/19417 [17:37<19:24:02,  3.65s/it][A[A[A[A[A




  2%|▏         | 298/19417 [17:40<18:40:47,  3.52s/it][A[A[A[A[A




  2%|▏         | 299/19417 [17:43<18:58:31,  3.57s/it][A[A[A[A[A




  2%|▏         | 300/19417 [17:47<18:57:02,  3.57s/it][A[A[A[A[A




  2%|▏         | 301/19417 [17:51<18:55:32,  3.56s/it][A[A[A[A[A




  2%|▏         | 302/19417 [17:54<19:05:26,  3.60s/it][A[A[A[A[A




  2%|▏         | 303/19417 [17:57<18:17:09,  3.44s/it][A[A[A[A[A




  2%|▏         | 304/19417 [18:01<19:02:03,  3.59s/it][A[A[A[A[A




  2%|▏         | 305/19417 [18:05

loss = 699.1593571 
predicted :  what did did was the the the the the the ? what of [SEP]
truth :  Knibb also protagonised the creation of what?







  2%|▏         | 309/19417 [18:18<17:33:09,  3.31s/it][A[A[A[A[A




  2%|▏         | 310/19417 [18:21<17:01:52,  3.21s/it][A[A[A[A[A




  2%|▏         | 311/19417 [18:24<17:47:22,  3.35s/it][A[A[A[A[A




  2%|▏         | 312/19417 [18:28<18:14:18,  3.44s/it][A[A[A[A[A




  2%|▏         | 313/19417 [18:31<17:07:32,  3.23s/it][A[A[A[A[A




  2%|▏         | 314/19417 [18:34<17:31:04,  3.30s/it][A[A[A[A[A




  2%|▏         | 315/19417 [18:38<18:47:04,  3.54s/it][A[A[A[A[A




  2%|▏         | 316/19417 [18:43<20:13:40,  3.81s/it][A[A[A[A[A




  2%|▏         | 317/19417 [18:46<18:57:44,  3.57s/it][A[A[A[A[A




  2%|▏         | 318/19417 [18:50<19:44:12,  3.72s/it][A[A[A[A[A




  2%|▏         | 319/19417 [18:53<18:15:45,  3.44s/it][A[A[A[A[A




  2%|▏         | 320/19417 [18:57<19:38:44,  3.70s/it][A[A[A[A[A




  2%|▏         | 321/19417 [19:01<19:57:47,  3.76s/it][A[A[A[A[A




  2%|▏         | 322/19417 [19:04

loss = 674.8439980 
predicted :  what was the of of of of ? ? [SEP]
truth :  When was ASCII first commercially used?







  2%|▏         | 326/19417 [19:19<19:38:09,  3.70s/it][A[A[A[A[A




  2%|▏         | 327/19417 [19:23<20:34:32,  3.88s/it][A[A[A[A[A




  2%|▏         | 328/19417 [19:27<21:27:28,  4.05s/it][A[A[A[A[A




  2%|▏         | 329/19417 [19:31<20:11:23,  3.81s/it][A[A[A[A[A




  2%|▏         | 330/19417 [19:34<19:32:15,  3.68s/it][A[A[A[A[A




  2%|▏         | 331/19417 [19:38<19:32:11,  3.68s/it][A[A[A[A[A




  2%|▏         | 332/19417 [19:42<19:42:21,  3.72s/it][A[A[A[A[A




  2%|▏         | 333/19417 [19:45<19:33:50,  3.69s/it][A[A[A[A[A




  2%|▏         | 334/19417 [19:50<21:02:12,  3.97s/it][A[A[A[A[A




  2%|▏         | 335/19417 [19:53<20:06:13,  3.79s/it][A[A[A[A[A




  2%|▏         | 336/19417 [19:57<20:42:16,  3.91s/it][A[A[A[A[A




  2%|▏         | 337/19417 [20:00<19:13:54,  3.63s/it][A[A[A[A[A




  2%|▏         | 338/19417 [20:04<19:14:32,  3.63s/it][A[A[A[A[A




  2%|▏         | 339/19417 [20:07

loss = 750.9984894 
predicted :  what was the of of ? of of ? [SEP]
truth :  Who was Yaroslav's father?







  2%|▏         | 343/19417 [20:21<17:23:29,  3.28s/it][A[A[A[A[A




  2%|▏         | 344/19417 [20:24<17:20:44,  3.27s/it][A[A[A[A[A




  2%|▏         | 345/19417 [20:28<19:01:43,  3.59s/it][A[A[A[A[A




  2%|▏         | 346/19417 [20:31<18:05:54,  3.42s/it][A[A[A[A[A




  2%|▏         | 347/19417 [20:35<17:54:43,  3.38s/it][A[A[A[A[A




  2%|▏         | 348/19417 [20:38<17:14:48,  3.26s/it][A[A[A[A[A




  2%|▏         | 349/19417 [20:41<16:59:06,  3.21s/it][A[A[A[A[A




  2%|▏         | 350/19417 [20:44<16:49:33,  3.18s/it][A[A[A[A[A




  2%|▏         | 351/19417 [20:48<17:37:21,  3.33s/it][A[A[A[A[A




  2%|▏         | 352/19417 [20:51<18:06:10,  3.42s/it][A[A[A[A[A




  2%|▏         | 353/19417 [20:54<17:28:34,  3.30s/it][A[A[A[A[A




  2%|▏         | 354/19417 [20:57<17:11:54,  3.25s/it][A[A[A[A[A




  2%|▏         | 355/19417 [21:00<16:45:00,  3.16s/it][A[A[A[A[A




  2%|▏         | 356/19417 [21:03

loss = 613.0880089 
predicted :  what what was is is was the of ? ? ? ? s ? the ? [SEP]
truth :  In what month and year did Plymouth see its highest temperature between 1971 and 2000?







  2%|▏         | 360/19417 [21:16<16:55:35,  3.20s/it][A[A[A[A[A




  2%|▏         | 361/19417 [21:20<17:30:26,  3.31s/it][A[A[A[A[A




  2%|▏         | 362/19417 [21:23<17:42:58,  3.35s/it][A[A[A[A[A




  2%|▏         | 363/19417 [21:27<18:06:48,  3.42s/it][A[A[A[A[A




  2%|▏         | 364/19417 [21:30<17:36:39,  3.33s/it][A[A[A[A[A




  2%|▏         | 365/19417 [21:33<17:55:39,  3.39s/it][A[A[A[A[A




  2%|▏         | 366/19417 [21:37<17:27:01,  3.30s/it][A[A[A[A[A




  2%|▏         | 367/19417 [21:40<18:21:18,  3.47s/it][A[A[A[A[A




  2%|▏         | 368/19417 [21:44<18:26:51,  3.49s/it][A[A[A[A[A




  2%|▏         | 369/19417 [21:47<18:20:47,  3.47s/it][A[A[A[A[A




  2%|▏         | 370/19417 [21:51<18:22:11,  3.47s/it][A[A[A[A[A




  2%|▏         | 371/19417 [21:55<19:29:53,  3.69s/it][A[A[A[A[A




  2%|▏         | 372/19417 [21:59<20:26:34,  3.86s/it][A[A[A[A[A




  2%|▏         | 373/19417 [22:03

loss = 678.1972370 
predicted :  what is the is of ? the of of ? [SEP]
truth :  What does HIma mean in Sanskirt?







  2%|▏         | 377/19417 [22:17<18:52:20,  3.57s/it][A[A[A[A[A




  2%|▏         | 378/19417 [22:20<19:05:43,  3.61s/it][A[A[A[A[A




  2%|▏         | 379/19417 [22:24<19:45:26,  3.74s/it][A[A[A[A[A




  2%|▏         | 380/19417 [22:29<20:22:27,  3.85s/it][A[A[A[A[A




  2%|▏         | 381/19417 [22:32<19:42:48,  3.73s/it][A[A[A[A[A




  2%|▏         | 382/19417 [22:36<19:30:18,  3.69s/it][A[A[A[A[A




  2%|▏         | 383/19417 [22:40<21:05:05,  3.99s/it][A[A[A[A[A




  2%|▏         | 384/19417 [22:44<20:38:23,  3.90s/it][A[A[A[A[A




  2%|▏         | 385/19417 [22:47<18:50:26,  3.56s/it][A[A[A[A[A




  2%|▏         | 386/19417 [22:51<20:18:38,  3.84s/it][A[A[A[A[A




  2%|▏         | 387/19417 [22:54<18:41:57,  3.54s/it][A[A[A[A[A




  2%|▏         | 388/19417 [22:58<19:24:36,  3.67s/it][A[A[A[A[A




  2%|▏         | 389/19417 [23:02<19:18:20,  3.65s/it][A[A[A[A[A




  2%|▏         | 390/19417 [23:06

loss = 736.4879742 
predicted :  what did the of of ? the ? the the ? [SEP]
truth :  What was the approximate population of Europe in 1347?







  2%|▏         | 394/19417 [23:21<20:10:00,  3.82s/it][A[A[A[A[A




  2%|▏         | 395/19417 [23:24<19:21:34,  3.66s/it][A[A[A[A[A




  2%|▏         | 396/19417 [23:27<18:53:02,  3.57s/it][A[A[A[A[A




  2%|▏         | 397/19417 [23:32<20:11:27,  3.82s/it][A[A[A[A[A




  2%|▏         | 398/19417 [23:35<19:50:09,  3.75s/it][A[A[A[A[A




  2%|▏         | 399/19417 [23:38<18:42:28,  3.54s/it][A[A[A[A[A




  2%|▏         | 400/19417 [23:41<17:41:23,  3.35s/it][A[A[A[A[A




  2%|▏         | 401/19417 [23:45<18:48:45,  3.56s/it][A[A[A[A[A




  2%|▏         | 402/19417 [23:48<17:22:08,  3.29s/it][A[A[A[A[A




  2%|▏         | 403/19417 [23:51<17:49:06,  3.37s/it][A[A[A[A[A




  2%|▏         | 404/19417 [23:55<18:27:34,  3.50s/it][A[A[A[A[A




  2%|▏         | 405/19417 [23:59<18:55:22,  3.58s/it][A[A[A[A[A




  2%|▏         | 406/19417 [24:02<18:29:36,  3.50s/it][A[A[A[A[A




  2%|▏         | 407/19417 [24:06

loss = 687.5768967 
predicted :  what is the s s ? [SEP]
truth :  What does the MoD defend?







  2%|▏         | 411/19417 [24:19<17:54:14,  3.39s/it][A[A[A[A[A




  2%|▏         | 412/19417 [24:23<18:05:13,  3.43s/it][A[A[A[A[A




  2%|▏         | 413/19417 [24:27<18:53:28,  3.58s/it][A[A[A[A[A




  2%|▏         | 414/19417 [24:30<19:08:59,  3.63s/it][A[A[A[A[A




  2%|▏         | 415/19417 [24:33<18:23:06,  3.48s/it][A[A[A[A[A




  2%|▏         | 416/19417 [24:37<18:39:15,  3.53s/it][A[A[A[A[A




  2%|▏         | 417/19417 [24:41<19:59:31,  3.79s/it][A[A[A[A[A




  2%|▏         | 418/19417 [24:45<19:49:45,  3.76s/it][A[A[A[A[A




  2%|▏         | 419/19417 [24:49<19:35:39,  3.71s/it][A[A[A[A[A




  2%|▏         | 420/19417 [24:53<19:43:38,  3.74s/it][A[A[A[A[A




  2%|▏         | 421/19417 [24:57<20:18:18,  3.85s/it][A[A[A[A[A




  2%|▏         | 422/19417 [25:00<19:06:39,  3.62s/it][A[A[A[A[A




  2%|▏         | 423/19417 [25:02<17:45:43,  3.37s/it][A[A[A[A[A




  2%|▏         | 424/19417 [25:06

loss = 706.7123718 
predicted :  what did s did the the ? the the the the the the s of ? [SEP]
truth :  What South African document from 1983 doesn't list Dutch as an official language?







  2%|▏         | 428/19417 [25:21<19:22:39,  3.67s/it][A[A[A[A[A




  2%|▏         | 429/19417 [25:25<20:44:48,  3.93s/it][A[A[A[A[A




  2%|▏         | 430/19417 [25:29<19:43:58,  3.74s/it][A[A[A[A[A




  2%|▏         | 431/19417 [25:32<19:01:26,  3.61s/it][A[A[A[A[A




  2%|▏         | 432/19417 [25:35<18:24:38,  3.49s/it][A[A[A[A[A




  2%|▏         | 433/19417 [25:39<18:18:57,  3.47s/it][A[A[A[A[A




  2%|▏         | 434/19417 [25:42<17:24:12,  3.30s/it][A[A[A[A[A




  2%|▏         | 435/19417 [25:44<16:16:54,  3.09s/it][A[A[A[A[A




  2%|▏         | 436/19417 [25:49<18:50:12,  3.57s/it][A[A[A[A[A




  2%|▏         | 437/19417 [25:52<18:46:19,  3.56s/it][A[A[A[A[A




  2%|▏         | 438/19417 [25:56<19:20:12,  3.67s/it][A[A[A[A[A




  2%|▏         | 439/19417 [26:00<19:23:11,  3.68s/it][A[A[A[A[A




  2%|▏         | 440/19417 [26:03<18:40:29,  3.54s/it][A[A[A[A[A




  2%|▏         | 441/19417 [26:06

loss = 641.5641632 
predicted :  what was the s of the of of ? ? [SEP]
truth :  What was the focus of English Dominican mysticism?







  2%|▏         | 445/19417 [26:21<18:15:17,  3.46s/it][A[A[A[A[A




  2%|▏         | 446/19417 [26:25<19:33:00,  3.71s/it][A[A[A[A[A




  2%|▏         | 447/19417 [26:28<18:50:11,  3.57s/it][A[A[A[A[A




  2%|▏         | 448/19417 [26:32<19:30:28,  3.70s/it][A[A[A[A[A




  2%|▏         | 449/19417 [26:36<19:30:18,  3.70s/it][A[A[A[A[A




  2%|▏         | 450/19417 [26:40<19:52:11,  3.77s/it][A[A[A[A[A




  2%|▏         | 451/19417 [26:43<18:49:16,  3.57s/it][A[A[A[A[A




  2%|▏         | 452/19417 [26:46<17:39:29,  3.35s/it][A[A[A[A[A




  2%|▏         | 453/19417 [26:49<16:44:07,  3.18s/it][A[A[A[A[A




  2%|▏         | 454/19417 [26:51<16:07:09,  3.06s/it][A[A[A[A[A




  2%|▏         | 455/19417 [26:55<16:43:47,  3.18s/it][A[A[A[A[A




  2%|▏         | 456/19417 [26:59<18:31:18,  3.52s/it][A[A[A[A[A




  2%|▏         | 457/19417 [27:02<17:46:33,  3.38s/it][A[A[A[A[A




  2%|▏         | 458/19417 [27:06

loss = 638.6514740 
predicted :  what of is the the ? the the ? [SEP]
truth :  Wha still calls Christians Notzrim?







  2%|▏         | 462/19417 [27:19<17:07:05,  3.25s/it][A[A[A[A[A




  2%|▏         | 463/19417 [27:23<18:20:31,  3.48s/it][A[A[A[A[A




  2%|▏         | 464/19417 [27:26<18:01:38,  3.42s/it][A[A[A[A[A




  2%|▏         | 465/19417 [27:30<19:36:17,  3.72s/it][A[A[A[A[A




  2%|▏         | 466/19417 [27:34<19:18:16,  3.67s/it][A[A[A[A[A




  2%|▏         | 467/19417 [27:37<19:16:19,  3.66s/it][A[A[A[A[A




  2%|▏         | 468/19417 [27:42<20:05:23,  3.82s/it][A[A[A[A[A




  2%|▏         | 469/19417 [27:45<18:47:42,  3.57s/it][A[A[A[A[A




  2%|▏         | 470/19417 [27:48<18:32:13,  3.52s/it][A[A[A[A[A




  2%|▏         | 471/19417 [27:52<18:30:16,  3.52s/it][A[A[A[A[A




  2%|▏         | 472/19417 [27:56<19:18:38,  3.67s/it][A[A[A[A[A




  2%|▏         | 473/19417 [28:00<19:44:43,  3.75s/it][A[A[A[A[A




  2%|▏         | 474/19417 [28:02<18:20:07,  3.48s/it][A[A[A[A[A




  2%|▏         | 475/19417 [28:06

loss = 684.2929211 
predicted :  what did the s s s of the s s ? the s s ? of the ? [SEP]
truth :  When was the most prior visit from a Burmese representative to the  United States prior to 2010 ? 







  2%|▏         | 479/19417 [28:21<19:53:30,  3.78s/it][A[A[A[A[A




  2%|▏         | 480/19417 [28:25<19:30:27,  3.71s/it][A[A[A[A[A




  2%|▏         | 481/19417 [28:28<17:56:37,  3.41s/it][A[A[A[A[A




  2%|▏         | 482/19417 [28:32<19:03:13,  3.62s/it][A[A[A[A[A




  2%|▏         | 483/19417 [28:35<18:24:09,  3.50s/it][A[A[A[A[A




  2%|▏         | 484/19417 [28:39<19:46:03,  3.76s/it][A[A[A[A[A




  2%|▏         | 485/19417 [28:43<18:57:51,  3.61s/it][A[A[A[A[A




  3%|▎         | 486/19417 [28:46<17:57:41,  3.42s/it][A[A[A[A[A




  3%|▎         | 487/19417 [28:49<18:00:42,  3.43s/it][A[A[A[A[A




  3%|▎         | 488/19417 [28:53<18:05:18,  3.44s/it][A[A[A[A[A




  3%|▎         | 489/19417 [28:57<19:32:39,  3.72s/it][A[A[A[A[A




  3%|▎         | 490/19417 [29:00<19:08:09,  3.64s/it][A[A[A[A[A




  3%|▎         | 491/19417 [29:04<19:22:05,  3.68s/it][A[A[A[A[A




  3%|▎         | 492/19417 [29:07

loss = 679.6963558 
predicted :  what did the s s s s ' ? the the s ? the ? ? [SEP]
truth :  When did English Chief Justice Sir Edward Coke speak in the Case of Prohibitions?







  3%|▎         | 496/19417 [29:21<17:37:28,  3.35s/it][A[A[A[A[A




  3%|▎         | 497/19417 [29:24<17:35:51,  3.35s/it][A[A[A[A[A




  3%|▎         | 498/19417 [29:28<18:52:26,  3.59s/it][A[A[A[A[A




  3%|▎         | 499/19417 [29:32<18:19:48,  3.49s/it][A[A[A[A[A




  3%|▎         | 500/19417 [29:35<17:39:57,  3.36s/it][A[A[A[A[A




  3%|▎         | 501/19417 [29:39<18:40:10,  3.55s/it][A[A[A[A[A




  3%|▎         | 502/19417 [29:42<18:45:28,  3.57s/it][A[A[A[A[A




  3%|▎         | 503/19417 [29:46<19:31:35,  3.72s/it][A[A[A[A[A




  3%|▎         | 504/19417 [29:50<19:47:27,  3.77s/it][A[A[A[A[A




  3%|▎         | 505/19417 [29:54<20:22:06,  3.88s/it][A[A[A[A[A




  3%|▎         | 506/19417 [29:59<20:43:19,  3.94s/it][A[A[A[A[A




  3%|▎         | 507/19417 [30:02<19:57:25,  3.80s/it][A[A[A[A[A




  3%|▎         | 508/19417 [30:05<19:04:17,  3.63s/it][A[A[A[A[A




  3%|▎         | 509/19417 [30:09

loss = 657.3899555 
predicted :  what did the ' ' the s s ' ? ? s ? ? [SEP]
truth :  Who said the Elizabeth was a behind the scenes force in ending apartheid?







  3%|▎         | 513/19417 [30:23<19:28:28,  3.71s/it][A[A[A[A[A




  3%|▎         | 514/19417 [30:26<18:55:36,  3.60s/it][A[A[A[A[A




  3%|▎         | 515/19417 [30:30<19:15:13,  3.67s/it][A[A[A[A[A




  3%|▎         | 516/19417 [30:34<18:40:03,  3.56s/it][A[A[A[A[A




  3%|▎         | 517/19417 [30:37<17:57:13,  3.42s/it][A[A[A[A[A




  3%|▎         | 518/19417 [30:41<19:21:26,  3.69s/it][A[A[A[A[A




  3%|▎         | 519/19417 [30:44<19:06:06,  3.64s/it][A[A[A[A[A




  3%|▎         | 520/19417 [30:48<19:23:40,  3.69s/it][A[A[A[A[A




  3%|▎         | 521/19417 [30:51<18:34:04,  3.54s/it][A[A[A[A[A




  3%|▎         | 522/19417 [30:55<18:05:41,  3.45s/it][A[A[A[A[A




  3%|▎         | 523/19417 [30:59<18:50:49,  3.59s/it][A[A[A[A[A




  3%|▎         | 524/19417 [31:02<18:27:50,  3.52s/it][A[A[A[A[A




  3%|▎         | 525/19417 [31:06<19:12:54,  3.66s/it][A[A[A[A[A




  3%|▎         | 526/19417 [31:09

loss = 642.3070831 
predicted :  what did the ' ' of the ? ? the ? [SEP]
truth :  What was the American plan to end conflict with Israel?







  3%|▎         | 530/19417 [31:22<17:12:57,  3.28s/it][A[A[A[A[A




  3%|▎         | 531/19417 [31:26<17:49:09,  3.40s/it][A[A[A[A[A




  3%|▎         | 532/19417 [31:29<17:49:33,  3.40s/it][A[A[A[A[A




  3%|▎         | 533/19417 [31:32<17:06:57,  3.26s/it][A[A[A[A[A




  3%|▎         | 534/19417 [31:36<17:46:18,  3.39s/it][A[A[A[A[A




  3%|▎         | 535/19417 [31:39<17:54:39,  3.41s/it][A[A[A[A[A




  3%|▎         | 536/19417 [31:43<18:05:03,  3.45s/it][A[A[A[A[A




  3%|▎         | 537/19417 [31:47<18:13:25,  3.47s/it][A[A[A[A[A




  3%|▎         | 538/19417 [31:51<19:27:33,  3.71s/it][A[A[A[A[A




  3%|▎         | 539/19417 [31:55<20:42:00,  3.95s/it][A[A[A[A[A




  3%|▎         | 540/19417 [31:58<19:22:00,  3.69s/it][A[A[A[A[A




  3%|▎         | 541/19417 [32:02<19:46:58,  3.77s/it][A[A[A[A[A




  3%|▎         | 542/19417 [32:06<19:47:31,  3.77s/it][A[A[A[A[A




  3%|▎         | 543/19417 [32:10

loss = 654.2286224 
predicted :  what did the of of the of the s of [SEP]
truth :  Who was the President of Mexico at the time?







  3%|▎         | 547/19417 [32:24<18:23:46,  3.51s/it][A[A[A[A[A




  3%|▎         | 548/19417 [32:27<18:58:24,  3.62s/it][A[A[A[A[A




  3%|▎         | 549/19417 [32:31<18:43:51,  3.57s/it][A[A[A[A[A




  3%|▎         | 550/19417 [32:35<19:11:04,  3.66s/it][A[A[A[A[A




  3%|▎         | 551/19417 [32:39<20:54:22,  3.99s/it][A[A[A[A[A




  3%|▎         | 552/19417 [32:43<20:04:14,  3.83s/it][A[A[A[A[A




  3%|▎         | 553/19417 [32:46<19:12:15,  3.66s/it][A[A[A[A[A




  3%|▎         | 554/19417 [32:50<19:30:58,  3.72s/it][A[A[A[A[A




  3%|▎         | 555/19417 [32:54<19:36:46,  3.74s/it][A[A[A[A[A




  3%|▎         | 556/19417 [32:57<19:06:48,  3.65s/it][A[A[A[A[A




  3%|▎         | 557/19417 [33:00<18:12:13,  3.47s/it][A[A[A[A[A




  3%|▎         | 558/19417 [33:05<19:18:16,  3.69s/it][A[A[A[A[A




  3%|▎         | 559/19417 [33:09<20:14:35,  3.86s/it][A[A[A[A[A




  3%|▎         | 560/19417 [33:13

loss = 659.9686050 
predicted :  what was was the s the the ? the s ' of ? ? [SEP]
truth :  What year did Dell come under fire from the Taiwanese Consumer Protection Commission?







  3%|▎         | 564/19417 [33:25<17:50:52,  3.41s/it][A[A[A[A[A




  3%|▎         | 565/19417 [33:29<19:19:38,  3.69s/it][A[A[A[A[A




  3%|▎         | 566/19417 [33:33<19:07:54,  3.65s/it][A[A[A[A[A




  3%|▎         | 567/19417 [33:36<17:56:34,  3.43s/it][A[A[A[A[A




  3%|▎         | 568/19417 [33:40<18:42:40,  3.57s/it][A[A[A[A[A




  3%|▎         | 569/19417 [33:43<18:34:13,  3.55s/it][A[A[A[A[A




  3%|▎         | 570/19417 [33:46<17:24:59,  3.33s/it][A[A[A[A[A




  3%|▎         | 571/19417 [33:49<16:35:29,  3.17s/it][A[A[A[A[A




  3%|▎         | 572/19417 [33:52<17:16:08,  3.30s/it][A[A[A[A[A




  3%|▎         | 573/19417 [33:56<17:46:51,  3.40s/it][A[A[A[A[A




  3%|▎         | 574/19417 [34:00<18:50:16,  3.60s/it][A[A[A[A[A




  3%|▎         | 575/19417 [34:03<17:52:14,  3.41s/it][A[A[A[A[A




  3%|▎         | 576/19417 [34:06<16:22:02,  3.13s/it][A[A[A[A[A




  3%|▎         | 577/19417 [34:10

loss = 647.5575809 
predicted :  what is the ? ? the of ? ' of ? ? [SEP]
truth :  What are the constituents of Class 1 transposable elements?







  3%|▎         | 581/19417 [34:24<18:44:03,  3.58s/it][A[A[A[A[A




  3%|▎         | 582/19417 [34:26<17:13:25,  3.29s/it][A[A[A[A[A




  3%|▎         | 583/19417 [34:30<17:59:24,  3.44s/it][A[A[A[A[A




  3%|▎         | 584/19417 [34:33<16:42:21,  3.19s/it][A[A[A[A[A




  3%|▎         | 585/19417 [34:37<17:47:59,  3.40s/it][A[A[A[A[A




  3%|▎         | 586/19417 [34:41<19:23:41,  3.71s/it][A[A[A[A[A




  3%|▎         | 587/19417 [34:46<21:07:27,  4.04s/it][A[A[A[A[A




  3%|▎         | 588/19417 [34:49<18:55:44,  3.62s/it][A[A[A[A[A




  3%|▎         | 589/19417 [34:52<18:55:43,  3.62s/it][A[A[A[A[A




  3%|▎         | 590/19417 [34:55<17:50:35,  3.41s/it][A[A[A[A[A




  3%|▎         | 591/19417 [34:59<18:16:43,  3.50s/it][A[A[A[A[A




  3%|▎         | 592/19417 [35:02<17:47:21,  3.40s/it][A[A[A[A[A




  3%|▎         | 593/19417 [35:06<18:21:11,  3.51s/it][A[A[A[A[A




  3%|▎         | 594/19417 [35:10

loss = 694.1357746 
predicted :  what s name the s did s s of of s of of ? the s ' ? ? [SEP]
truth :  The Earl of Mornington would have how many ___ guns on the EIc ships?







  3%|▎         | 598/19417 [35:27<20:54:50,  4.00s/it][A[A[A[A[A




  3%|▎         | 599/19417 [35:30<19:26:40,  3.72s/it][A[A[A[A[A




  3%|▎         | 600/19417 [35:33<19:10:35,  3.67s/it][A[A[A[A[A




  3%|▎         | 601/19417 [35:37<18:43:57,  3.58s/it][A[A[A[A[A




  3%|▎         | 602/19417 [35:41<19:41:28,  3.77s/it][A[A[A[A[A




  3%|▎         | 603/19417 [35:45<20:29:02,  3.92s/it][A[A[A[A[A




  3%|▎         | 604/19417 [35:49<20:47:10,  3.98s/it][A[A[A[A[A




  3%|▎         | 605/19417 [35:54<21:56:20,  4.20s/it][A[A[A[A[A




  3%|▎         | 606/19417 [35:57<20:21:22,  3.90s/it][A[A[A[A[A




  3%|▎         | 607/19417 [36:00<19:24:14,  3.71s/it][A[A[A[A[A




  3%|▎         | 608/19417 [36:04<19:12:25,  3.68s/it][A[A[A[A[A




  3%|▎         | 609/19417 [36:07<18:53:52,  3.62s/it][A[A[A[A[A




  3%|▎         | 610/19417 [36:11<18:46:45,  3.59s/it][A[A[A[A[A




  3%|▎         | 611/19417 [36:14

loss = 711.0727005 
predicted :  what did name name name did s was was the ? the ? [SEP]
truth :  How many number of contacts and combats were there in May?







  3%|▎         | 615/19417 [36:29<18:33:47,  3.55s/it][A[A[A[A[A




  3%|▎         | 616/19417 [36:32<17:54:43,  3.43s/it][A[A[A[A[A




  3%|▎         | 617/19417 [36:35<17:35:43,  3.37s/it][A[A[A[A[A




  3%|▎         | 618/19417 [36:39<17:48:50,  3.41s/it][A[A[A[A[A




  3%|▎         | 619/19417 [36:43<18:58:25,  3.63s/it][A[A[A[A[A




  3%|▎         | 620/19417 [36:47<19:53:30,  3.81s/it][A[A[A[A[A




  3%|▎         | 621/19417 [36:51<19:32:29,  3.74s/it][A[A[A[A[A




  3%|▎         | 622/19417 [36:53<17:58:00,  3.44s/it][A[A[A[A[A




  3%|▎         | 623/19417 [36:57<17:36:29,  3.37s/it][A[A[A[A[A




  3%|▎         | 624/19417 [37:00<17:49:51,  3.42s/it][A[A[A[A[A




  3%|▎         | 625/19417 [37:03<16:54:36,  3.24s/it][A[A[A[A[A




  3%|▎         | 626/19417 [37:06<17:07:17,  3.28s/it][A[A[A[A[A




  3%|▎         | 627/19417 [37:10<18:04:25,  3.46s/it][A[A[A[A[A




  3%|▎         | 628/19417 [37:14

loss = 637.5129700 
predicted :  what did the of ? the [SEP]
truth :  What was Sanskrit used for?







  3%|▎         | 632/19417 [37:28<18:18:06,  3.51s/it][A[A[A[A[A




  3%|▎         | 633/19417 [37:31<18:25:16,  3.53s/it][A[A[A[A[A




  3%|▎         | 634/19417 [37:35<19:09:45,  3.67s/it][A[A[A[A[A




  3%|▎         | 635/19417 [37:39<19:30:59,  3.74s/it][A[A[A[A[A




  3%|▎         | 636/19417 [37:42<17:59:18,  3.45s/it][A[A[A[A[A




  3%|▎         | 637/19417 [37:45<17:31:43,  3.36s/it][A[A[A[A[A




  3%|▎         | 638/19417 [37:48<17:03:15,  3.27s/it][A[A[A[A[A




  3%|▎         | 639/19417 [37:52<17:20:56,  3.33s/it][A[A[A[A[A




  3%|▎         | 640/19417 [37:56<19:18:11,  3.70s/it][A[A[A[A[A




  3%|▎         | 641/19417 [38:00<18:43:30,  3.59s/it][A[A[A[A[A




  3%|▎         | 642/19417 [38:03<18:30:16,  3.55s/it][A[A[A[A[A




  3%|▎         | 643/19417 [38:07<18:20:25,  3.52s/it][A[A[A[A[A




  3%|▎         | 644/19417 [38:10<17:44:37,  3.40s/it][A[A[A[A[A




  3%|▎         | 645/19417 [38:14

loss = 671.0431290 
predicted :  what of the ? the ? the ? the ? ? ? the ? the first ? ? [SEP]
truth :  Southampton was important to transit between Winchester and what other region in England in the 11th century?







  3%|▎         | 649/19417 [38:29<20:35:11,  3.95s/it][A[A[A[A[A




  3%|▎         | 650/19417 [38:32<19:18:09,  3.70s/it][A[A[A[A[A




  3%|▎         | 651/19417 [38:36<19:12:57,  3.69s/it][A[A[A[A[A




  3%|▎         | 652/19417 [38:41<20:45:06,  3.98s/it][A[A[A[A[A




  3%|▎         | 653/19417 [38:44<19:13:32,  3.69s/it][A[A[A[A[A




  3%|▎         | 654/19417 [38:47<18:20:18,  3.52s/it][A[A[A[A[A




  3%|▎         | 655/19417 [38:50<17:58:38,  3.45s/it][A[A[A[A[A




  3%|▎         | 656/19417 [38:54<18:28:58,  3.55s/it][A[A[A[A[A




  3%|▎         | 657/19417 [38:58<18:50:37,  3.62s/it][A[A[A[A[A




  3%|▎         | 658/19417 [39:01<18:27:12,  3.54s/it][A[A[A[A[A




  3%|▎         | 659/19417 [39:05<19:24:59,  3.73s/it][A[A[A[A[A




  3%|▎         | 660/19417 [39:09<19:27:07,  3.73s/it][A[A[A[A[A




  3%|▎         | 661/19417 [39:12<19:05:24,  3.66s/it][A[A[A[A[A




  3%|▎         | 662/19417 [39:16

loss = 674.2744083 
predicted :  what is s s is the ? [SEP]
truth :  How many square miles is Oklahoma?







  3%|▎         | 666/19417 [39:31<18:40:34,  3.59s/it][A[A[A[A[A




  3%|▎         | 667/19417 [39:35<19:28:55,  3.74s/it][A[A[A[A[A




  3%|▎         | 668/19417 [39:37<17:55:37,  3.44s/it][A[A[A[A[A




  3%|▎         | 669/19417 [39:41<18:48:47,  3.61s/it][A[A[A[A[A




  3%|▎         | 670/19417 [39:44<17:37:11,  3.38s/it][A[A[A[A[A




  3%|▎         | 671/19417 [39:48<18:00:42,  3.46s/it][A[A[A[A[A




  3%|▎         | 672/19417 [39:52<19:50:41,  3.81s/it][A[A[A[A[A




  3%|▎         | 673/19417 [39:58<21:49:52,  4.19s/it][A[A[A[A[A




  3%|▎         | 674/19417 [40:02<21:27:23,  4.12s/it][A[A[A[A[A




  3%|▎         | 675/19417 [40:04<19:37:51,  3.77s/it][A[A[A[A[A




  3%|▎         | 676/19417 [40:07<17:47:04,  3.42s/it][A[A[A[A[A




  3%|▎         | 677/19417 [40:10<17:06:12,  3.29s/it][A[A[A[A[A




  3%|▎         | 678/19417 [40:14<18:10:10,  3.49s/it][A[A[A[A[A




  3%|▎         | 679/19417 [40:17

loss = 651.9264278 
predicted :  what did did the the s of the ' of ? the the the the ? ? the s ? the ? [SEP]
truth :  Who theorized that the types of media people use to communicate will offer different possibilities for the shape of society?







  4%|▎         | 683/19417 [40:33<19:55:58,  3.83s/it][A[A[A[A[A




  4%|▎         | 684/19417 [40:36<18:53:37,  3.63s/it][A[A[A[A[A




  4%|▎         | 685/19417 [40:40<19:13:59,  3.70s/it][A[A[A[A[A




  4%|▎         | 686/19417 [40:44<19:46:17,  3.80s/it][A[A[A[A[A




  4%|▎         | 687/19417 [40:48<20:10:24,  3.88s/it][A[A[A[A[A




  4%|▎         | 688/19417 [40:54<22:47:51,  4.38s/it][A[A[A[A[A




  4%|▎         | 689/19417 [40:58<22:37:46,  4.35s/it][A[A[A[A[A




  4%|▎         | 690/19417 [41:02<22:04:41,  4.24s/it][A[A[A[A[A




  4%|▎         | 691/19417 [41:06<21:20:02,  4.10s/it][A[A[A[A[A




  4%|▎         | 692/19417 [41:09<19:43:26,  3.79s/it][A[A[A[A[A




  4%|▎         | 693/19417 [41:12<19:21:40,  3.72s/it][A[A[A[A[A




  4%|▎         | 694/19417 [41:15<18:27:52,  3.55s/it][A[A[A[A[A




  4%|▎         | 695/19417 [41:18<17:29:23,  3.36s/it][A[A[A[A[A




  4%|▎         | 696/19417 [41:22

loss = 682.5982704 
predicted :  what did the ? the the ? ? the ? [SEP]
truth :  Who was responsible for supply humanitarian aid to civilians?







  4%|▎         | 700/19417 [41:37<18:42:37,  3.60s/it][A[A[A[A[A




