In [0]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
project_dir = '/gdrive/My Drive/BERT/squad' #@param {type:"string"}

In [0]:
!pip install pytorch-transformers
!pip install bcolz 



In [0]:
from pytorch_transformers import *
from pytorch_transformers.tokenization_bert import whitespace_tokenize
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, TensorDataset)
from tqdm import tqdm_notebook, trange
from sklearn.metrics import f1_score
    
import os
import pandas as pd
import torch
import torch.nn as nn
import logging
import numpy as np
import collections
import string
import re
import json
import bcolz
import pickle
import torch.optim as optim 

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [0]:
import sys
root_dir, project_name = os.path.split(project_dir)
sys.path.append(root_dir)

# import all custom helper classes and functions
from helpers import RobertaForForQuestionAnswering, LSTMTokenizer, getLSTM, is_whitespace, format_df, _improve_answer_span, _check_is_max_context, normalize_answer, get_tokens, compute_exact, compute_f1, get_squad_scores, getGloveData

In [0]:
# set device (gpu or cpu)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1 Data preprocessing

In [0]:
# download squad 2.0 dataset
!mkdir data
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O data/train.json
!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O data/test.json

mkdir: cannot create directory ‘data’: File exists
--2019-11-24 14:23:46--  https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json
Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.109.153, 185.199.110.153, 185.199.111.153, ...
Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.109.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 42123633 (40M) [application/json]
Saving to: ‘data/train.json’


2019-11-24 14:23:47 (157 MB/s) - ‘data/train.json’ saved [42123633/42123633]

--2019-11-24 14:23:49--  https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json
Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.109.153, 185.199.110.153, 185.199.111.153, ...
Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.109.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4370528 (4.2M) [application/json]
Saving to: ‘data/test.json’


2019-11-24 14:23:49 (36.9 MB/s) - ‘data/test.jso

In [0]:
# import data
df_train = pd.read_json("data/train.json")
df_test = pd.read_json("data/test.json")

print(df_train.shape)
df_train.head()

(442, 2)


Unnamed: 0,version,data
0,v2.0,"{'title': 'Beyoncé', 'paragraphs': [{'qas': [{..."
1,v2.0,"{'title': 'Frédéric_Chopin', 'paragraphs': [{'..."
2,v2.0,{'title': 'Sino-Tibetan_relations_during_the_M...
3,v2.0,"{'title': 'IPod', 'paragraphs': [{'qas': [{'qu..."
4,v2.0,{'title': 'The_Legend_of_Zelda:_Twilight_Princ...


In [0]:
df_train['data'][0]['paragraphs']

[{'context': 'Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny\'s Child. Managed by her father, Mathew Knowles, the group became one of the world\'s best-selling girl groups of all time. Their hiatus saw the release of Beyoncé\'s debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".',
  'qas': [{'answers': [{'answer_start': 269, 'text': 'in the late 1990s'}],
    'id': '56be85543aeaaa14008c9063',
    'is_impossible': False,
    'question': 'When did Beyonce start becoming popular?'},
   {'answers': [{'answer_start': 207, 'text': 'singing and dancing'}],
    'id': '5

In [0]:
# format data
df_train = format_df(df_train)
df_test = format_df(df_test)

In [0]:
print(df_test.shape)
print(df_train.shape)
df_train.head()

(11873, 8)
(130319, 8)


Unnamed: 0,question,doc_tokens,orig_answer_text,start_position,end_position,is_impossible,num_possible_answers,gold_answers
0,When did Beyonce start becoming popular?,"[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",in the late 1990s,39,42,False,1,[in the late 1990s]
1,What areas did Beyonce compete in when she was...,"[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",singing and dancing,28,30,False,1,[singing and dancing]
2,When did Beyonce leave Destiny's Child and bec...,"[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",2003,82,82,False,1,[2003]
3,In what city and state did Beyonce grow up?,"[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...","Houston, Texas",22,23,False,1,"[Houston, Texas]"
4,In which decade did Beyonce become famous?,"[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",late 1990s,41,42,False,1,[late 1990s]


In [0]:
df_filterd = df_train[df_train['is_impossible'] == True]
impossible_percentage = round(len(df_filterd) / len(df_train) * 100, 1)
print(str(impossible_percentage) + "% of questions are impossible to answer.")

33.4% of questions are impossible to answer.


In [0]:
models = {
    'bert': {
        'features_path_train': os.path.join(project_dir, 'data', 'features_train_bert'),
        'features_path_test': os.path.join(project_dir, 'data', 'features_test_bert'),
        'classes': (BertForQuestionAnswering, BertTokenizer),
        'model_name': 'bert-base-cased',
        'output_dir': os.path.join(project_dir, 'models', 'BERT')
    },
    'roberta': {
         'features_path_train': os.path.join(project_dir, 'data', 'features_train_roberta'),
         'features_path_test': os.path.join(project_dir, 'data', 'features_test_roberta'),
         'classes': (RobertaForForQuestionAnswering, RobertaTokenizer),
         'model_name': 'roberta-base',
         'output_dir': os.path.join(project_dir, 'models', 'RoBERTa')
    },
    'xlnet': {
        'features_path_train': os.path.join(project_dir, 'data', 'features_train_xlnet'),
        'features_path_test': os.path.join(project_dir, 'data', 'features_test_xlnet'),
        'classes': (XLNetForQuestionAnswering, XLNetTokenizer),
        'model_name': 'xlnet-base-cased',
        'output_dir': os.path.join(project_dir, 'models', 'XLNet')
    },
    'distilbert': {
        'features_path_train': os.path.join(project_dir, 'data', 'features_train_distilbert'),
        'features_path_test': os.path.join(project_dir, 'data', 'features_test_distilbert'),
        'classes': (DistilBertForQuestionAnswering, DistilBertTokenizer),
        'model_name': 'distilbert-base-uncased',
        'output_dir': os.path.join(project_dir, 'models', 'DistilBERT')
    }
}

In [0]:
# each df entry needs to be converted into a InputFeatures object
class InputFeatures(object):
  def __init__(self, example_index, doc_span_index, tokens, token_to_orig_map, token_is_max_context, input_ids, input_mask, segment_ids, cls_index, p_mask, paragraph_len, start_position, end_position, is_impossible, text_length):
    self.example_index = example_index
    self.doc_span_index = doc_span_index
    self.tokens = tokens
    self.token_to_orig_map = token_to_orig_map
    self.token_is_max_context = token_is_max_context
    self.input_ids = input_ids
    self.input_mask = input_mask
    self.segment_ids = segment_ids
    self.cls_index = cls_index
    self.p_mask = p_mask
    self.paragraph_len = paragraph_len
    self.start_position = start_position
    self.end_position = end_position
    self.is_impossible = is_impossible
    self.text_length = text_length

In [0]:
# settings these values not too high speeds up training
MAX_SEQ_LENGTH = 384
MAX_QUERY_LENGTH = 64

In [0]:
def preprocess_data(df, tokenizer, max_entries=80000, xlnet=False, lstm=False):
  df = df.sample(n=max_entries, random_state=420)
  features = []
  step = 0

  for index, row in df.iterrows():
    step += 1
    query_tokens = tokenizer.tokenize(row['question'])

    if len(query_tokens) > MAX_QUERY_LENGTH:
      query_tokens = query_tokens[0:MAX_QUERY_LENGTH]
    
    tok_to_orig_index = []
    orig_to_tok_index = []
    all_doc_tokens = []
    for (i, token) in enumerate(row['doc_tokens']):
      orig_to_tok_index.append(len(all_doc_tokens))
      sub_tokens = tokenizer.tokenize(token)
      for sub_token in sub_tokens:
        tok_to_orig_index.append(i)
        all_doc_tokens.append(sub_token)

    tok_start_position = None
    tok_end_position = None
    if row['is_impossible']:
      tok_start_position = -1
      tok_end_position = -1
    else:
      tok_start_position = orig_to_tok_index[row['start_position']]
      if row['end_position'] < len(row['doc_tokens']) - 1:
        tok_end_position = orig_to_tok_index[row['end_position'] + 1] - 1
      else:
        tok_end_position = len(all_doc_tokens) - 1
        (tok_start_position, tok_end_position) = _improve_answer_span(all_doc_tokens, tok_start_position, tok_end_position, tokenizer, row['orig_answer_text'])

    # The -3 accounts for [CLS], [SEP] and [SEP]
    max_tokens_for_doc = MAX_SEQ_LENGTH - len(query_tokens) - 3

    # We can have documents that are longer than the maximum sequence length.
    # To deal with this we do a sliding window approach, where we take chunks
    # of the up to our max length with a stride of 128.
    _DocSpan = collections.namedtuple("DocSpan", ["start", "length"])
    doc_spans = []
    start_offset = 0
    while start_offset < len(all_doc_tokens):
      length = len(all_doc_tokens) - start_offset
      if length > max_tokens_for_doc:
        length = max_tokens_for_doc
      doc_spans.append(_DocSpan(start=start_offset, length=length))
      if start_offset + length == len(all_doc_tokens):
        break
      start_offset += min(length, 128)

    for (doc_span_index, doc_span) in enumerate(doc_spans):
      tokens = []
      token_to_orig_map = {}
      token_is_max_context = {}
      segment_ids = []

      # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer)
      p_mask = []

      # CLS token at the beginning
      if not xlnet:
        tokens.append(tokenizer.cls_token)
        segment_ids.append(0)
        p_mask.append(0)
        cls_index = 0

      # Query
      for token in query_tokens:
        tokens.append(token)
        segment_ids.append(0)
        p_mask.append(1)

      # SEP token
      tokens.append(tokenizer.sep_token)
      segment_ids.append(0)
      p_mask.append(1)

      # Paragraph
      for i in range(doc_span.length):
        split_token_index = doc_span.start + i
        token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]

        is_max_context = _check_is_max_context(doc_spans, doc_span_index, split_token_index)
        token_is_max_context[len(tokens)] = is_max_context
        tokens.append(all_doc_tokens[split_token_index])
        segment_ids.append(1)
        p_mask.append(0)
      paragraph_len = doc_span.length

      # SEP token
      tokens.append(tokenizer.sep_token)
      segment_ids.append(1)
      p_mask.append(1)

      # CLS token at the end
      if xlnet:
        tokens.append(tokenizer.cls_token)
        segment_ids.append(2)
        p_mask.append(0)
        cls_index = len(tokens) - 1

      text_length = len(tokens)

      input_ids = tokenizer.convert_tokens_to_ids(tokens)

       # apply padding
      input_mask = ([1] * len(input_ids))
      while len(input_ids) < MAX_SEQ_LENGTH:
        input_ids.append(3 if lstm else 0)
        input_mask.append(0)
        segment_ids.append(4 if xlnet else 0)
        p_mask.append(1)    

      
      span_is_impossible = row['is_impossible']
      start_position = None
      end_position = None

      if not span_is_impossible:
        # For training, if our document chunk does not contain an annotation
        # we throw it out, since there is nothing to predict.
        doc_start = doc_span.start
        doc_end = doc_span.start + doc_span.length - 1
        out_of_span = False
        if not (tok_start_position >= doc_start and tok_end_position <= doc_end):
          out_of_span = True
        if out_of_span:
          start_position = 0
          end_position = 0
          span_is_impossible = True
        else:
          doc_offset = len(query_tokens) + 2
          start_position = tok_start_position - doc_start + doc_offset
          end_position = tok_end_position - doc_start + doc_offset  
      else:
        start_position = cls_index
        end_position = cls_index  


    features.append(InputFeatures(
        example_index=index,
        doc_span_index=doc_span_index,
        tokens=tokens,
        token_to_orig_map=token_to_orig_map,
        token_is_max_context=token_is_max_context,
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids,
        cls_index=cls_index,
        p_mask=p_mask,
        paragraph_len=paragraph_len,
        start_position=start_position,
        end_position=end_position,
        is_impossible=span_is_impossible,
        text_length=text_length
    ))

    # log progress
    if (step % (max_entries/10)) == 0:
      print(str(step/max_entries * 100) + '%')

  return features

In [0]:
def get_dataset(features):
  # create a TensorDataset out of all features
  all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
  all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
  all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
  all_cls_index = torch.tensor([f.cls_index for f in features], dtype=torch.long)
  all_p_mask = torch.tensor([f.p_mask for f in features], dtype=torch.float)
  all_start_positions = torch.tensor([f.start_position for f in features], dtype=torch.long)
  all_end_positions = torch.tensor([f.end_position for f in features], dtype=torch.long)
  all_example_indexes = torch.tensor([f.example_index for f in features], dtype=torch.long)
  all_text_lengths = torch.tensor([f.text_length for f in features], dtype=torch.long)
  dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions, all_cls_index, all_p_mask, all_example_indexes, all_text_lengths)
  return dataset

In [0]:
# download glove pretrained weights
!mkdir glove
!wget http://nlp.stanford.edu/data/glove.6B.zip -O glove/glove.zip
!unzip glove/glove.zip -d glove

--2019-11-24 09:04:07--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2019-11-24 09:04:07--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2019-11-24 09:04:07--  http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove/glove.zip’


201

In [0]:
glove, vectors, word2idx, words = getGloveData()

In [0]:
LSTMTokenizer = LSTMTokenizer(word2idx)

# 2 Training

In [0]:
def train(dataset, model, output_dir, model_type):
  # create DataLoader object
  train_sampler = RandomSampler(dataset)
  train_dataloader = DataLoader(dataset, sampler=train_sampler, batch_size=8)

  # init hyper parameters
  if model_type != 'lstm':
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
      {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0},
      {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=4e-5, eps=1e-8)
    scheduler = WarmupLinearSchedule(optimizer, warmup_steps=0, t_total=len(train_dataloader))
  else:
    optimizer = optim.Adam(model.parameters())

  global_step = 0
  tr_loss = 0.0
  model.zero_grad()

  # epoch iterator with progress output
  train_iterator = trange(1, desc="epoch")

  logger.info("***** Running training *****")

  for _ in train_iterator:
    epoch_iterator = tqdm_notebook(train_dataloader, desc="iteration")
    for step, batch in enumerate(epoch_iterator):
      global_step += 1
      model.train()
      batch = tuple(t.to(device) for t in batch)
      inputs = {
        'input_ids':  batch[0],
        'start_positions': batch[3],
        'end_positions': batch[4],
      }

      if model_type != 'lstm':
        inputs['attention_mask'] = batch[1]
      else:
        inputs['text_lengths'] = batch[8]

      if model_type != 'distilbert' and model_type != 'lstm':
        inputs['token_type_ids'] = batch[2] if model_type in ['bert', 'xlnet'] else None
      if model_type == 'xlnet':
        inputs.update({
            'cls_index': batch[5],
            'p_mask': batch[6]
        })

      outputs = model(**inputs)

      # model outputs are tuples
      loss = outputs[0]
      #print("\r%f" % loss, end='')

      loss.backward()
      torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
      tr_loss += loss.item()

      # update learning rate schedule
      optimizer.step()
      if model_type != 'lstm':
        scheduler.step() 
      model.zero_grad()

      if global_step % 500 == 0:
        logger.info(" global_step = %s, average loss = %s", global_step, tr_loss /global_step)
      
      if global_step % 1000 == 0:
        # save model
        drive.mount('/gdrive')
        if not os.path.exists(output_dir):
          os.makedirs(output_dir)
        if model_type == 'lstm':
          torch.save(model.state_dict(), os.path.join(output_dir, 'lstm-model.pt'))
        else:
          model.save_pretrained(output_dir)
        logger.info("model saved at global_step = %s", global_step)


  # save model
  drive.mount('/gdrive')
  if not os.path.exists(output_dir):
    os.makedirs(output_dir)
  if model_type == 'lstm':
    torch.save(model.state_dict(), os.path.join(output_dir, 'lstm-model.pt'))
  else:
    model.save_pretrained(output_dir)

## 2.1 BERT

In [0]:
model_class, tokenizer_class = models['bert']['classes']
tokenizer = tokenizer_class.from_pretrained(models['bert']['model_name'])

# load generic pretrained bert model
model = model_class.from_pretrained(models['bert']['model_name'])
model.to(device)

INFO:pytorch_transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt not found in cache or force_download set to True, downloading to /tmp/tmpklr21znv
100%|██████████| 213450/213450 [00:00<00:00, 399624.06B/s]
INFO:pytorch_transformers.file_utils:copying /tmp/tmpklr21znv to cache at /root/.cache/torch/pytorch_transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1
INFO:pytorch_transformers.file_utils:creating metadata file for /root/.cache/torch/pytorch_transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1
INFO:pytorch_transformers.file_utils:removing temp file /tmp/tmpklr21znv
INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /root/.cache/torch/pytorch_transformers/5e8

BertForQuestionAnswering(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_

In [0]:
# create and save train features
print('preparing train data')
features_train = preprocess_data(df_train, tokenizer)
drive.mount('/gdrive')
torch.save(features_train, models['bert']['features_path_train'])

# create and save test features
print('preparing test data')
features_test = preprocess_data(df_test, tokenizer, 8000)
drive.mount('/gdrive')
torch.save(features_test, models['bert']['features_path_test'])

# put features into a pytorch dataset
train_dataset = get_dataset(features_train)

# free memory
del features_train
del features_test

preparing train data
10.0%
20.0%
30.0%
40.0%
50.0%
60.0%
70.0%
80.0%
90.0%
100.0%
Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
preparing test data
10.0%
20.0%
30.0%
40.0%
50.0%
60.0%
70.0%
80.0%
90.0%
100.0%
Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
# train model & save checkpoint
train(train_dataset, model, models['bert']['output_dir'], 'bert')


epoch:   0%|          | 0/1 [00:00<?, ?it/s][AINFO:__main__:***** Running training *****


HBox(children=(IntProgress(value=0, description='iteration', max=10000, style=ProgressStyle(description_width=…

INFO:__main__: global_step = 500, average loss = 0.9321511623859405
INFO:__main__: global_step = 1000, average loss = 0.9283151290416718


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 1000
INFO:__main__: global_step = 1500, average loss = 0.9416839692195257
INFO:__main__: global_step = 2000, average loss = 0.9503509084656835


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 2000
INFO:__main__: global_step = 2500, average loss = 0.9555197073578835
INFO:__main__: global_step = 3000, average loss = 0.9555208188494047


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 3000
INFO:__main__: global_step = 3500, average loss = 0.9523872745037079
INFO:__main__: global_step = 4000, average loss = 0.9501787048056721


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 4000
INFO:__main__: global_step = 4500, average loss = 0.9494862282276153
INFO:__main__: global_step = 5000, average loss = 0.9446226977735758


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 5000
INFO:__main__: global_step = 5500, average loss = 0.9384774831994014
INFO:__main__: global_step = 6000, average loss = 0.9299835491677125


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 6000
INFO:__main__: global_step = 6500, average loss = 0.9251176882454982
INFO:__main__: global_step = 7000, average loss = 0.9176933065461261


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 7000
INFO:__main__: global_step = 7500, average loss = 0.9121691760321459
INFO:__main__: global_step = 8000, average loss = 0.9066506588812917


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 8000
INFO:__main__: global_step = 8500, average loss = 0.9031109209621654
INFO:__main__: global_step = 9000, average loss = 0.8974194722043143


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 9000
INFO:__main__: global_step = 9500, average loss = 0.8916772236322101
INFO:__main__: global_step = 10000, average loss = 0.886323946851492


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 10000

epoch: 100%|██████████| 1/1 [1:03:12<00:00, 3792.90s/it][A
[A

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


## 2.2 RoBERTa

In [0]:
model_class, tokenizer_class = models['roberta']['classes']
tokenizer = tokenizer_class.from_pretrained(models['roberta']['model_name'])

# load generic pretrained roberta model
model = model_class.from_pretrained(models['roberta']['model_name'])
model.to(device)

INFO:pytorch_transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json not found in cache or force_download set to True, downloading to /tmp/tmpmp955xb8
100%|██████████| 898823/898823 [00:00<00:00, 2345412.87B/s]
INFO:pytorch_transformers.file_utils:copying /tmp/tmpmp955xb8 to cache at /root/.cache/torch/pytorch_transformers/d0c5776499adc1ded22493fae699da0971c1ee4c2587111707a4d177d20257a2.ef00af9e673c7160b4d41cfda1f48c5f4cba57d5142754525572a846a1ab1b9b
INFO:pytorch_transformers.file_utils:creating metadata file for /root/.cache/torch/pytorch_transformers/d0c5776499adc1ded22493fae699da0971c1ee4c2587111707a4d177d20257a2.ef00af9e673c7160b4d41cfda1f48c5f4cba57d5142754525572a846a1ab1b9b
INFO:pytorch_transformers.file_utils:removing temp file /tmp/tmpmp955xb8
INFO:pytorch_transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt not found in cache or force_download set to True, downloading to /tmp/tmpgl7859l0

RobertaForForQuestionAnswering(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=0)
      (position_embeddings): Embedding(514, 768)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-0

In [0]:
# create and save train features
print('preparing train data')
features_train = preprocess_data(df_train, tokenizer)
drive.mount('/gdrive')
torch.save(features_train, models['roberta']['features_path_train'])

# create and save test features
print('preparing test data')
features_test = preprocess_data(df_test, tokenizer, 8000)
drive.mount('/gdrive')
torch.save(features_test, models['roberta']['features_path_test'])

# put features into a pytorch dataset
train_dataset = get_dataset(features_train)

# free memory
del features_train
del features_test

preparing train data
10.0%
20.0%
30.0%
40.0%
50.0%
60.0%
70.0%
80.0%
90.0%
100.0%
Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
preparing test data
10.0%
20.0%
30.0%
40.0%
50.0%
60.0%
70.0%
80.0%
90.0%
100.0%
Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
# train model & save checkpoint
train(train_dataset, model, models['roberta']['output_dir'], 'roberta')

epoch:   0%|          | 0/1 [00:00<?, ?it/s]INFO:__main__:***** Running training *****


HBox(children=(IntProgress(value=0, description='iteration', max=10000, style=ProgressStyle(description_width=…

INFO:__main__: global_step = 500, average loss = 0.8904026729464531
INFO:__main__: global_step = 1000, average loss = 0.9010241118371487


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 1000
INFO:__main__: global_step = 1500, average loss = 0.9079492493594686
INFO:__main__: global_step = 2000, average loss = 0.918008641043678


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 2000
INFO:__main__: global_step = 2500, average loss = 0.916452514950931
INFO:__main__: global_step = 3000, average loss = 0.915699507423987


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 3000
INFO:__main__: global_step = 3500, average loss = 0.9177884134809886
INFO:__main__: global_step = 4000, average loss = 0.9103360473392531


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 4000
INFO:__main__: global_step = 4500, average loss = 0.9026416219613619
INFO:__main__: global_step = 5000, average loss = 0.8945478912942112


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 5000
INFO:__main__: global_step = 5500, average loss = 0.8879846649190242
INFO:__main__: global_step = 6000, average loss = 0.8795909668797006


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 6000
INFO:__main__: global_step = 6500, average loss = 0.871379038117826
INFO:__main__: global_step = 7000, average loss = 0.8655829266368279


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 7000
INFO:__main__: global_step = 7500, average loss = 0.8598055814976494
INFO:__main__: global_step = 8000, average loss = 0.8541560384756886


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 8000
INFO:__main__: global_step = 8500, average loss = 0.8467239809829522
INFO:__main__: global_step = 9000, average loss = 0.8397055038805638


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 9000
INFO:__main__: global_step = 9500, average loss = 0.8327480328777118
INFO:__main__: global_step = 10000, average loss = 0.8264168753731996


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 10000
epoch: 100%|██████████| 1/1 [1:06:50<00:00, 4010.37s/it]



Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


## 2.3 XLNet

In [0]:
model_class, tokenizer_class = models['xlnet']['classes']
tokenizer = tokenizer_class.from_pretrained(models['xlnet']['model_name'])

# load generic pretrained xlnet model
model = model_class.from_pretrained(models['xlnet']['model_name'])
model.to(device)

INFO:pytorch_transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model not found in cache or force_download set to True, downloading to /tmp/tmp6hz_sirv

  0%|          | 0/798011 [00:00<?, ?B/s][A
  2%|▏         | 17408/798011 [00:00<00:07, 97898.47B/s][A
  7%|▋         | 52224/798011 [00:00<00:06, 114870.83B/s][A
 17%|█▋        | 138240/798011 [00:00<00:04, 148744.50B/s][A
 33%|███▎      | 261120/798011 [00:00<00:02, 194292.93B/s][A
 46%|████▌     | 365568/798011 [00:00<00:01, 242634.50B/s][A
 92%|█████████▏| 731136/798011 [00:01<00:00, 329681.70B/s][A
100%|██████████| 798011/798011 [00:01<00:00, 735036.40B/s][AINFO:pytorch_transformers.file_utils:copying /tmp/tmp6hz_sirv to cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8
INFO:pytorch_transformers.file_utils:creating metadata file for /root/.cache/to

XLNetForQuestionAnswering(
  (transformer): XLNetModel(
    (word_embedding): Embedding(32000, 768)
    (layer): ModuleList(
      (0): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (layer_1): Linear(in_features=768, out_features=3072, bias=True)
          (layer_2): Linear(in_features=3072, out_features=768, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (1): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e-12, 

In [0]:
# create and save train features
print('preparing train data')
features_train = preprocess_data(df_train, tokenizer)
drive.mount('/gdrive')
torch.save(features_train, models['xlnet']['features_path_train'])

# create and save test features
print('preparing test data')
features_test = preprocess_data(df_test, tokenizer, 8000)
drive.mount('/gdrive')
torch.save(features_test, models['xlnet']['features_path_test'])

# put features into a pytorch dataset
train_dataset = get_dataset(features_train)

# free memory
del features_train
del features_test

preparing train data
10.0%
20.0%
30.0%
40.0%
50.0%
60.0%
70.0%
80.0%
90.0%
100.0%
Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
preparing test data
10.0%
20.0%
30.0%
40.0%
50.0%
60.0%
70.0%
80.0%
90.0%
100.0%
Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
# train model & save checkpoint
train(train_dataset, model, models['xlnet']['output_dir'], 'xlnet')



epoch:   0%|          | 0/1 [00:00<?, ?it/s][A[AINFO:__main__:***** Running training *****


HBox(children=(IntProgress(value=0, description='iteration', max=10000, style=ProgressStyle(description_width=…

INFO:__main__: global_step = 500, average loss = 0.5728385083600879
INFO:__main__: global_step = 1000, average loss = 0.5852686773799359


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 1000
INFO:__main__: global_step = 1500, average loss = 0.57692375575006
INFO:__main__: global_step = 2000, average loss = 0.5754506793543697


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 2000
INFO:__main__: global_step = 2500, average loss = 0.5772076408438385
INFO:__main__: global_step = 3000, average loss = 0.5732541666701436


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 3000
INFO:__main__: global_step = 3500, average loss = 0.5705870801338128
INFO:__main__: global_step = 4000, average loss = 0.5653828579941764


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 4000
INFO:__main__: global_step = 4500, average loss = 0.5609012665789989
INFO:__main__: global_step = 5000, average loss = 0.5574445194035769


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 5000
INFO:__main__: global_step = 5500, average loss = 0.555074265291745
INFO:__main__: global_step = 6000, average loss = 0.5529647913165391


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 6000
INFO:__main__: global_step = 6500, average loss = 0.549640874137099
INFO:__main__: global_step = 7000, average loss = 0.5464677570488836


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 7000
INFO:__main__: global_step = 7500, average loss = 0.5418635860477885
INFO:__main__: global_step = 8000, average loss = 0.5377662604311481


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 8000
INFO:__main__: global_step = 8500, average loss = 0.534214763260063
INFO:__main__: global_step = 9000, average loss = 0.530369077494161


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 9000
INFO:__main__: global_step = 9500, average loss = 0.5264636959100241
INFO:__main__: global_step = 10000, average loss = 0.5234827802518383


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 10000


epoch: 100%|██████████| 1/1 [2:26:02<00:00, 8762.73s/it][A[A

[A[A

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


## 2.4 DistilBERT

In [0]:
model_class, tokenizer_class = models['distilbert']['classes']
tokenizer = tokenizer_class.from_pretrained(models['distilbert']['model_name'])

# load generic pretrained distilbert model
model = model_class.from_pretrained(models['distilbert']['model_name'])
model.to(device)

INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.cache/torch/pytorch_transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
INFO:pytorch_transformers.modeling_utils:loading configuration file /gdrive/My Drive/BERT/squad/models/DistilBERT/config.json
INFO:pytorch_transformers.modeling_utils:Model config {
  "activation": "gelu",
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "finetuning_task": null,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "n_heads": 12,
  "n_layers": 6,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "pruned_heads": {},
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torchscript": false,
  "vocab_size": 30522
}

INFO:

DistilBertForQuestionAnswering(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (dropout): Dropout(p=0.1, inplace=False)
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (d

In [0]:
# create and save train features
print('preparing train data')
features_train = preprocess_data(df_train, tokenizer)
drive.mount('/gdrive')
torch.save(features_train, models['distilbert']['features_path_train'])

# create and save test features
print('preparing test data')
features_test = preprocess_data(df_test, tokenizer, 8000)
drive.mount('/gdrive')
torch.save(features_test, models['distilbert']['features_path_test'])

# put features into a pytorch dataset
train_dataset = get_dataset(features_train)

# free memory
del features_train
del features_test

preparing train data
10.0%
20.0%
30.0%
40.0%
50.0%
60.0%
70.0%
80.0%
90.0%
100.0%
Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
preparing test data
10.0%
20.0%
30.0%
40.0%
50.0%
60.0%
70.0%
80.0%
90.0%
100.0%
Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
# train model & save checkpoint
train(train_dataset, model, models['distilbert']['output_dir'], 'distilbert')

epoch:   0%|          | 0/1 [00:00<?, ?it/s]INFO:__main__:***** Running training *****


HBox(children=(IntProgress(value=0, description='iteration', max=10000, style=ProgressStyle(description_width=…

INFO:__main__: global_step = 500, average loss = 2.2581234564781187
INFO:__main__: global_step = 1000, average loss = 2.109137038230896


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 1000
INFO:__main__: global_step = 1500, average loss = 2.0143284160296124
INFO:__main__: global_step = 2000, average loss = 1.933571294873953


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 2000
INFO:__main__: global_step = 2500, average loss = 1.8818276997566223
INFO:__main__: global_step = 3000, average loss = 1.8289616846342882


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 3000
INFO:__main__: global_step = 3500, average loss = 1.7884427446467537
INFO:__main__: global_step = 4000, average loss = 1.7507512162402272


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 4000
INFO:__main__: global_step = 4500, average loss = 1.7191563635600937
INFO:__main__: global_step = 5000, average loss = 1.6891141402900218


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 5000
INFO:__main__: global_step = 5500, average loss = 1.6593343585214833
INFO:__main__: global_step = 6000, average loss = 1.629778693549335


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 6000
INFO:__main__: global_step = 6500, average loss = 1.6044979380552584
INFO:__main__: global_step = 7000, average loss = 1.5801183391703026


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 7000
INFO:__main__: global_step = 7500, average loss = 1.5588050935089588
INFO:__main__: global_step = 8000, average loss = 1.5415647127535195


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 8000
INFO:__main__: global_step = 8500, average loss = 1.5247907016154598
INFO:__main__: global_step = 9000, average loss = 1.5083564092268547


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 9000
INFO:__main__: global_step = 9500, average loss = 1.4934020525860159
INFO:__main__: global_step = 10000, average loss = 1.4784884032949805


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 10000
epoch: 100%|██████████| 1/1 [33:49<00:00, 2029.09s/it]



Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


## 2.3 Bi-LSTM

In [0]:
model = getLSTM(evice, embeddings=vectors, qa=True)

In [0]:
# create and save train features
print('preparing train data')
features_train = preprocess_data(df_train, LSTMTokenizer, lstm=True)
drive.mount('/gdrive')
torch.save(features_train, os.path.join(project_dir, 'data', 'features_train_lstm'))

# create and save test features
print('preparing test data')
features_test = preprocess_data(df_test, LSTMTokenizer, 8000, lstm=True)
drive.mount('/gdrive')
torch.save(features_test, os.path.join(project_dir, 'data', 'features_test_lstm'))

# put features into a pytorch dataset
train_dataset = get_dataset(features_train)

# free memory
del features_train
del features_test

preparing train data
10.0%
20.0%
30.0%
40.0%
50.0%
60.0%
70.0%
80.0%
90.0%
100.0%
Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
preparing test data
10.0%
20.0%
30.0%
40.0%
50.0%
60.0%
70.0%
80.0%
90.0%
100.0%
Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
# train model & save checkpoint
train(train_dataset, model, os.path.join(project_dir, 'models', 'LSTM'), 'lstm')

epoch:   0%|          | 0/1 [00:00<?, ?it/s]INFO:__main__:***** Running training *****


HBox(children=(IntProgress(value=0, description='iteration', max=10000, style=ProgressStyle(description_width=…

INFO:__main__: global_step = 500, average loss = 3.218540790081024
INFO:__main__: global_step = 1000, average loss = 3.1863477213382723


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 1000
INFO:__main__: global_step = 1500, average loss = 3.172174642562866
INFO:__main__: global_step = 2000, average loss = 3.1714648227095603


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 2000
INFO:__main__: global_step = 2500, average loss = 3.1669366000652315
INFO:__main__: global_step = 3000, average loss = 3.1656378345092135


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 3000
INFO:__main__: global_step = 3500, average loss = 3.1677348762920925
INFO:__main__: global_step = 4000, average loss = 3.16735283818841


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 4000
INFO:__main__: global_step = 4500, average loss = 3.16844736538993
INFO:__main__: global_step = 5000, average loss = 3.1691914393901826


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 5000
INFO:__main__: global_step = 5500, average loss = 3.1662756648930634
INFO:__main__: global_step = 6000, average loss = 3.169444642404715


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 6000
INFO:__main__: global_step = 6500, average loss = 3.167347810836939
INFO:__main__: global_step = 7000, average loss = 3.166485785927091


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 7000
INFO:__main__: global_step = 7500, average loss = 3.16537316532135
INFO:__main__: global_step = 8000, average loss = 3.163231661528349


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 8000
INFO:__main__: global_step = 8500, average loss = 3.1636465280616983
INFO:__main__: global_step = 9000, average loss = 3.1654741593731774


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 9000
INFO:__main__: global_step = 9500, average loss = 3.1641371753843206
INFO:__main__: global_step = 10000, average loss = 3.163286468625069


Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:model saved at global_step = 10000
epoch: 100%|██████████| 1/1 [12:45<00:00, 765.49s/it]



Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


# 3 Evaluation

In [0]:
def evaluate(model, tokenizer, features_path, model_type):
  drive.mount('/gdrive')
  features_test = torch.load(features_path)  
  test_dataset = get_dataset(features_test)

  test_sampler = SequentialSampler(test_dataset)
  test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=8) 

  logger.info("***** Running evaluation *****")

  test_loss = 0.0
  test_steps = 0
  preds_start = None
  preds_end = None,
  all_gold_answers = []
  all_tokens = []
  cls_indexes = []

  for batch in tqdm_notebook(test_dataloader, desc="evaluating"):
    model.eval()
    batch = tuple(t.to(device) for t in batch)
    with torch.no_grad():
      inputs = {
        'input_ids':  batch[0]
      }

      if model_type != 'lstm':
        inputs['attention_mask'] = batch[1]
      else:
        inputs['text_lengths'] = batch[8]

      if model_type != 'distilbert' and model_type != 'lstm':
        inputs['token_type_ids'] = batch[2] if model_type in ['bert', 'xlnet'] else None
      
      if model_type == 'xlnet':
        inputs.update({
            'cls_index': batch[5],
            'p_mask': batch[6]
        })

      exapmle_indexes = batch[7]
      outputs = model(**inputs)

      if model_type == "xlnet":
        start_top_log_probs, start_top_index, end_top_log_probs, end_top_index = outputs[:4]
        if preds_start is None:
          preds_start = start_top_index.detach().cpu().numpy()
          preds_end = end_top_index.detach().cpu().numpy()
        else:
          preds_start = np.append(preds_start, start_top_index.detach().cpu().numpy(), axis=0)
          preds_end = np.append(preds_end, end_top_index.detach().cpu().numpy(), axis=0)
      else:
        start_logits, end_logits = outputs[:2]
        start_values = start_logits.detach().cpu().numpy().tolist()
        end_values = end_logits.detach().cpu().numpy().tolist()
        
        # padd with zeros if output lengths are different (lstm case)
        start_values = np.array([preds + [0] * (MAX_SEQ_LENGTH - len(preds)) for preds in start_values])
        end_values = np.array([preds + [0] * (MAX_SEQ_LENGTH - len(preds)) for preds in end_values])

        if preds_start is None:
          preds_start = start_values
          preds_end = end_values
        else:
          preds_start = np.append(preds_start, start_values, axis=0)
          preds_end = np.append(preds_end, end_values, axis=0)

      test_steps += 1
      for example_index in exapmle_indexes:
        example_index_value = example_index.item()
        all_gold_answers.append(df_test['gold_answers'][example_index_value])
        matching_feature = next((x for x in features_test if x.example_index == example_index_value))
        tokens = matching_feature.tokens
        cls_indexes.append(matching_feature.cls_index)
        all_tokens.append(matching_feature.tokens)

  if model_type != "xlnet":
    preds_start = np.argmax(preds_start, axis=1)
    preds_end = np.argmax(preds_end, axis=1)
  else:
    preds_start = preds_start[:, 0]
    preds_end = preds_end[:, 0]

  f1_score, exact_match = get_squad_scores(preds_start, preds_end, all_tokens, all_gold_answers, cls_indexes, tokenizer)

  logger.info(" f1_score = %s", f1_score)
  logger.info(" exact_match = %s", exact_match)

In [0]:
# BERT evaluation
model_class, tokenizer_class = models['bert']['classes']
tokenizer = tokenizer_class.from_pretrained(models['bert']['model_name'])
model = model_class.from_pretrained(models['bert']['output_dir'])
model.to(device)

evaluate(model, tokenizer, models['bert']['features_path_test'], 'bert')

INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt from cache at /root/.cache/torch/pytorch_transformers/5e8a2b4893d13790ed4150ca1906be5f7a03d6c4ddf62296c383f6db42814db2.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1
INFO:pytorch_transformers.modeling_utils:loading configuration file /gdrive/My Drive/BERT/squad/models/BERT/config.json
INFO:pytorch_transformers.modeling_utils:Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "pruned_heads": {},
  "torchscript": false,
  "type_vocab_size": 2,
  "vocab_size": 28996
}

INFO:pytor

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:***** Running evaluation *****


HBox(children=(IntProgress(value=0, description='evaluating', max=1000, style=ProgressStyle(description_width=…

INFO:__main__: f1_score = 0.6795278298431943
INFO:__main__: exact_match = 0.63325


In [0]:
# RoBERTa evaluation
model_class, tokenizer_class = models['roberta']['classes']
tokenizer = tokenizer_class.from_pretrained(models['roberta']['model_name'])
model = model_class.from_pretrained(models['roberta']['output_dir'])
model.to(device)

evaluate(model, tokenizer, models['roberta']['features_path_test'], 'roberta')

INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json from cache at /root/.cache/torch/pytorch_transformers/d0c5776499adc1ded22493fae699da0971c1ee4c2587111707a4d177d20257a2.ef00af9e673c7160b4d41cfda1f48c5f4cba57d5142754525572a846a1ab1b9b
INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt from cache at /root/.cache/torch/pytorch_transformers/b35e7cd126cd4229a746b5d5c29a749e8e84438b14bcdb575950584fe33207e8.70bec105b4158ed9a1747fea67a43f5dee97855c64d62b6ec3742f4cfdb5feda
INFO:pytorch_transformers.modeling_utils:loading configuration file /gdrive/My Drive/BERT/squad/models/RoBERTa/config.json
INFO:pytorch_transformers.modeling_utils:Model config {
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediat

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:***** Running evaluation *****


HBox(children=(IntProgress(value=0, description='evaluating', max=1000, style=ProgressStyle(description_width=…




INFO:__main__: f1_score = 0.79309528561664
INFO:__main__: exact_match = 0.7605


In [0]:
# XLNet evaluation
model_class, tokenizer_class = models['xlnet']['classes']
tokenizer = tokenizer_class.from_pretrained(models['xlnet']['model_name'])
model = model_class.from_pretrained(models['xlnet']['output_dir'])
model.to(device)

evaluate(model, tokenizer, models['xlnet']['features_path_test'], 'xlnet')

INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model from cache at /root/.cache/torch/pytorch_transformers/dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8
INFO:pytorch_transformers.modeling_utils:loading configuration file /gdrive/My Drive/BERT/squad/models/XLNet/config.json
INFO:pytorch_transformers.modeling_utils:Model config {
  "attn_type": "bi",
  "bi_data": false,
  "clamp_len": -1,
  "d_head": 64,
  "d_inner": 3072,
  "d_model": 768,
  "dropout": 0.1,
  "end_n_top": 5,
  "ff_activation": "gelu",
  "finetuning_task": null,
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "mem_len": null,
  "n_head": 12,
  "n_layer": 12,
  "n_token": 32000,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "pruned_heads": {},
  "reuse_len": null,
  "same_length": false,
  "start_n_top": 5,

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:***** Running evaluation *****


HBox(children=(IntProgress(value=0, description='evaluating', max=1000, style=ProgressStyle(description_width=…

INFO:__main__: f1_score = 0.768254689945326
INFO:__main__: exact_match = 0.735375


In [0]:
# DistilBERT evaluation
model_class, tokenizer_class = models['distilbert']['classes']
tokenizer = tokenizer_class.from_pretrained(models['distilbert']['model_name'])
model = model_class.from_pretrained(models['distilbert']['output_dir'])
model.to(device)

evaluate(model, tokenizer, models['distilbert']['features_path_test'], 'distilbert')

INFO:pytorch_transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.cache/torch/pytorch_transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
INFO:pytorch_transformers.modeling_utils:loading configuration file /gdrive/My Drive/BERT/squad/models/DistilBERT/config.json
INFO:pytorch_transformers.modeling_utils:Model config {
  "activation": "gelu",
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "finetuning_task": null,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "n_heads": 12,
  "n_layers": 6,
  "num_labels": 2,
  "output_attentions": false,
  "output_hidden_states": false,
  "pruned_heads": {},
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torchscript": false,
  "vocab_size": 30522
}

INFO:

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:***** Running evaluation *****


HBox(children=(IntProgress(value=0, description='evaluating', max=1000, style=ProgressStyle(description_width=…

INFO:__main__: f1_score = 0.6135553244673522
INFO:__main__: exact_match = 0.572875


In [0]:
# Bi-LSTM evaluation
model = getLSTM(evice, embeddings=vectors, qa=True)
model.load_state_dict(torch.load(os.path.join(project_dir, 'models', 'LSTM', 'lstm-model.pt')))
evaluate(model, LSTMTokenizer, os.path.join(project_dir, 'data', 'features_test_lstm'), 'lstm')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INFO:__main__:***** Running evaluation *****


HBox(children=(IntProgress(value=0, description='evaluating', max=1000, style=ProgressStyle(description_width=…




INFO:__main__: f1_score = 0.499
INFO:__main__: exact_match = 0.499
