In [1]:
import numpy as np
import pandas as pd
import nltk
import os
import json
import transformers
import torch
import random
from torch import cuda
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader
from transformers import DistilBertTokenizer, DistilBertModel, DistilBertForQuestionAnswering

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [2]:
# Setting the random seed for consistent results
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(random_seed)

In [3]:
device = 'cuda' if cuda.is_available() else 'cpu'

In [4]:
device

'cuda'

### Loading the data

In [5]:
# Loading train data
train_file = open('/kaggle/input/squad-2/train-v2.0.json')
train_data = json.load(train_file)

# Loading validation data
val_file = open('/kaggle/input/squad-2/dev-v2.0.json')
val_data = json.load(val_file)

### Preprocessing the dataset

In [6]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [7]:
def index_converter(context, context_tokenized):
    """
    Maps start str index to a tokenized index
    """
    mapper = {}
    curr = ''
    token_idx = 0
    for i, char in enumerate(context):
        if char != ' ' and char != '\n' and char != '\t' and char != '\r':
            curr += char
            if curr == context_tokenized[token_idx]:
                start = i - len(curr) + 1
                for j in range(start, i+1):
                    mapper[j] = (curr, token_idx)                
                curr = ''
                token_idx += 1
    if token_idx != len(context_tokenized):
        return None
    return mapper

def preprocess_data(dataset, is_training=True, tokenized=True):
    """
    Extracts the info from json_data object into a pandas readable data representation (list of dicts)
    """
    
    def _tokenize(seq):
        """
        Performing tokenization to minimize errors between tokenizers and encodings.
        """
        return [t.replace("``", '"').replace("''", '"') for t in seq.split()]
    
    examples = [] 
    tokenization_errors = 0
    misaligned_ans_errors = 0
    num_impossibles = 0
    num_questions = 0
    
    for article_id in tqdm(range(len(dataset['data']))): 
        paragraphs = dataset['data'][article_id]['paragraphs']
        for paragraph_id in range(len(paragraphs)):
            questions = dataset['data'][article_id]['paragraphs'][paragraph_id]['qas']
            
            context = paragraphs[paragraph_id]['context']
            context_tokenized = _tokenize(context)
                    
            for qid in range(len(questions)): 
                num_questions += 1
                
                question = questions[qid]['question']
                question_tokenized = _tokenize(question)
                qas_id = questions[qid]['id']
                
                is_impossible = questions[qid]['is_impossible']
                
                if is_impossible: 
                    num_impossibles += 1
                    examples.append({'qas_id': qas_id, 
                                     'question':question_tokenized if tokenized else question, 
                                     'context': context_tokenized if tokenized else context, 
                                     'answer':'', 
                                     'is_impossible': is_impossible,
                                     'start_pos': -1, 
                                     'end_pos':-1,
                                    'santiy_check': context_tokenized[-1:0]})
                    continue
                    
                answers = questions[qid]['answers']
                
                for ans_id in range(len(answers)): 
                    answer = answers[ans_id]['text']
                    start_pos = answers[ans_id]['answer_start'] 
                    end_pos = start_pos + len(answer) 
                          
                    if context[start_pos:end_pos] != answer:
                        misaligned_ans_errors += 1
                        continue
                        
                    if tokenized:
                        mapper = index_converter(context, context_tokenized)
                        if mapper is None:
                            tokenization_errors += 1
                            continue
                        
                        start_pos = mapper[start_pos][1]
                        end_pos = mapper[end_pos-1][1] 
                    
                    examples.append({'qas_id': qas_id, 
                                     'question':question_tokenized if tokenized else question, 
                                     'context': context_tokenized if tokenized else context, 
                                     'answer':answer, 
                                     'is_impossible': is_impossible,
                                     'start_pos': start_pos, 
                                     'end_pos':end_pos,
                                    'santiy_check': context_tokenized[start_pos:end_pos+1] if tokenized else context[start_pos:end_pos+1]})
            
                    
    print('No. of questions:{}'.format(num_questions))
    return examples

Preprocessing of Train data

In [8]:
train_processed = preprocess_data(train_data)

100%|██████████| 442/442 [00:41<00:00, 10.76it/s]

No. of questions:130319





In [9]:
train_df = pd.DataFrame(train_processed)
train_df[:5]

Unnamed: 0,qas_id,question,context,answer,is_impossible,start_pos,end_pos,santiy_check
0,56be85543aeaaa14008c9063,"[When, did, Beyonce, start, becoming, popular?]","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",in the late 1990s,False,39,42,"[in, the, late, 1990s]"
1,56be85543aeaaa14008c9065,"[What, areas, did, Beyonce, compete, in, when,...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",singing and dancing,False,28,30,"[singing, and, dancing]"
2,56be85543aeaaa14008c9066,"[When, did, Beyonce, leave, Destiny's, Child, ...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",2003,False,82,82,"[(2003),]"
3,56bf6b0f3aeaaa14008c9601,"[In, what, city, and, state, did, Beyonce, gro...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...","Houston, Texas",False,22,23,"[Houston,, Texas,]"
4,56bf6b0f3aeaaa14008c9602,"[In, which, decade, did, Beyonce, become, famo...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",late 1990s,False,41,42,"[late, 1990s]"


In [10]:
def encode_data(processed_data, tokenizer, max_len, max_query_len):
    """
    Converts examples of data into Distilbert input format tensors.
    """
    context_length_errors = 0
    encoded_data = []
    for sample in tqdm(processed_data):
        question_raw = ' '.join(sample['question'])
        context_raw = ' '.join(sample['context'])
        if len(question_raw) > max_query_len:
            question_raw = question_raw[:max_query_len]
        
        # encode the data using the tokenizer
        encoded = tokenizer.encode_plus(question_raw, context_raw,
                                        max_length=max_len,
                                        padding='max_length',
                                        truncation='only_second',
                                        return_token_type_ids=True)
        if sample['is_impossible']:
            start = -1
            end = -1
        else: 
            input_ids = encoded['input_ids']
            answer_ids = tokenizer.encode(sample['answer']) 
            start, end = 0, 0 
            for i in range(len(input_ids)):
                if input_ids[i: i+len(answer_ids[1:-1])] == answer_ids[1:-1]:
                    start = i
                    end = i + len(answer_ids[1:-1]) - 1
                    break
            
        ids = encoded['input_ids']
        token_type_ids = encoded['token_type_ids']
        mask = encoded['attention_mask']
        
        assert len(ids) == max_len
        assert len(token_type_ids) == max_len
        assert len(mask) == max_len
        
        encoded_data.append({'ids': ids,
                      'token_type_ids': token_type_ids,
                      'mask': mask,
                      'start_pos': start,
                      'end_pos': end})        
    return encoded_data

In [11]:
# Model Settings
MAX_SEQ_LEN = 512
MAX_QN_LEN = 128
NO_EPOCHS = 3
BATCH_SIZE = 8
LEARNING_RATE = 5e-05
OUT = 2

In [12]:
train_encoded = encode_data(train_processed, tokenizer, MAX_SEQ_LEN, MAX_QN_LEN)

100%|██████████| 130217/130217 [12:43<00:00, 170.45it/s]


In [13]:
# Creation of Dataloader
input_ids = torch.tensor([sample['ids'] for sample in train_encoded], dtype=torch.long)
input_masks = torch.tensor([sample['mask'] for sample in train_encoded], dtype=torch.long)
segment_ids = torch.tensor([sample['token_type_ids'] for sample in train_encoded], dtype=torch.long)

start_positions = torch.tensor([sample['start_pos'] for sample in train_encoded], dtype=torch.long)
end_positions = torch.tensor([sample['end_pos'] for sample in train_encoded], dtype=torch.long)
train_dataset = TensorDataset(input_ids, input_masks, segment_ids, start_positions, end_positions)

train_params = {'batch_size': BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(train_dataset, **train_params)

Preprocessing of Validation Data

In [14]:
val_processed = preprocess_data(val_data)

100%|██████████| 35/35 [00:10<00:00,  3.42it/s]

No. of questions:11873





In [15]:
val_df = pd.DataFrame(val_processed)

In [16]:
val_df[:5]

Unnamed: 0,qas_id,question,context,answer,is_impossible,start_pos,end_pos,santiy_check
0,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
1,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
2,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
3,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
4,56ddde6b9a695914005b9629,"[When, were, the, Normans, in, Normandy?]","[The, Normans, (Norman:, Nourmands;, French:, ...",10th and 11th centuries,False,14,17,"[10th, and, 11th, centuries]"


In [17]:
val_encoded = encode_data(val_processed, tokenizer, MAX_SEQ_LEN, MAX_QN_LEN)

100%|██████████| 26232/26232 [02:51<00:00, 153.28it/s]


In [18]:
# Creation of Dataloader
input_ids = torch.tensor([sample['ids'] for sample in val_encoded], dtype=torch.long)
input_mask = torch.tensor([sample['mask'] for sample in val_encoded], dtype=torch.long)
segment_ids = torch.tensor([sample['token_type_ids'] for sample in val_encoded], dtype=torch.long)

start_positions = torch.tensor([sample['start_pos'] for sample in val_encoded], dtype=torch.long)
end_positions = torch.tensor([sample['end_pos'] for sample in val_encoded], dtype=torch.long)

val_dataset = TensorDataset(input_ids, input_mask, segment_ids, start_positions, end_positions)

val_params = {'batch_size': BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }    

val_loader = DataLoader(val_dataset, **val_params)

Evaluation Metrics for Prediction

In [19]:
# Functions to calculate Evaluation Metrics
def normalize_text(s):
    """Removing articles, punctuation, and standardizing whitespace are all typical text processing steps."""
    import string, re
    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
        return re.sub(regex, " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

def compute_f1(prediction, truth):
    ''' Calculates the F1 score'''
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()

    # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)

    common_tokens = set(pred_tokens) & set(truth_tokens)

    # if there are no common tokens then f1 = 0
    if len(common_tokens) == 0:
        return 0

    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)

    return 2 * (prec * rec) / (prec + rec)

def compute_exact_match(prediction, truth):
    ''' Computes the exact match score '''
    return int(normalize_text(prediction) == normalize_text(truth))

In [20]:
def train(model, training_loader, optimizer):
    ''' Training finetunes the DISTILBERT model for Question Answering'''
    step = 0
    counter = 0
    loss_tracker = 0
    em_score = 0
    f1_score = 0
    model.zero_grad()
    model.train()
    counter = 0
    for data in tqdm(training_loader):
        data = tuple(d.to(device) for d in data)
        inputs = {'input_ids':     data[0],
                'attention_mask':  data[1],   
                'start_positions': data[3], 
                'end_positions':   data[4]}
        outputs = model(**inputs)
        loss = outputs[0]
        loss.backward() # back propagation
        optimizer.step()
        model.zero_grad()
        
        starts = outputs[1]
        ends = outputs[2]
        target_starts = data[3]
        target_ends = data[4]
        
        for i,(s,e) in enumerate(zip(starts, ends)):
            start_pred = torch.argmax(s)
            end_pred = torch.argmax(e)
            predicted_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][start_pred : end_pred+1]))
            predicted_answer = predicted_answer if predicted_answer!= '[CLS]' else ''
            actual_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][target_starts[i] : target_ends[i]+1]))
            curr_em = compute_exact_match(predicted_answer, actual_answer)
            curr_f1 = compute_f1(predicted_answer, actual_answer)
            em_score += curr_em
            f1_score += curr_f1
            counter += 1
                
        loss_tracker += loss.item()
        step += 1
        if step % 1000 == 0:
            print("Train loss: {}, Exact Match: {}, F1 Score: {}".format(loss_tracker/step, em_score/counter, f1_score/counter))

    return loss_tracker/step, em_score/counter, f1_score/counter


def validator(model, testing_loader):
    ''' Performs Prediction of the Answers'''
    print('Starting validation...')
    model.eval()
    preds = []
    targs = []
    pred_answers = []
    target_answers = []
    val_loss = 0
    step = 0
    with torch.no_grad():
        for data in tqdm(testing_loader):
            data = tuple(d.to(device) for d in data)
            inputs = {'input_ids': data[0],
                'attention_mask':  data[1], 
                'start_positions': data[3], 
                'end_positions':   data[4]}
            output = model(**inputs)
            valloss = output.loss
            val_loss += valloss.item()
            starts = output[1]
            ends = output[2]
            start_preds = []
            end_preds = []
            
            target_starts = data[3]
            target_ends = data[4]
            step += 1

            for s,e in zip(starts, ends):
                start_pred = torch.argmax(s)
                start_preds.append(start_pred)
                end_pred = torch.argmax(e)
                end_preds.append(end_pred)
            
            for i, (s,e) in enumerate(zip(start_preds, target_ends)):
                preds.append((s,e))
                predicted_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][s : e+1]))
                pred_answers.append(predicted_answer)
            
            for i, (s,e) in enumerate(zip(target_starts, target_ends)):
                targs.append((s,e))
                actual_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][s : e+1]))
                target_answers.append(actual_answer)
                
        em_score = 0
        f1_score = 0
        pred_answers = [item if item != '[CLS]' else '' for item in pred_answers]
        for predicted_ans, target_ans in zip(pred_answers, target_answers):
            em_score += compute_exact_match(predicted_ans, target_ans)
            f1_score += compute_f1(predicted_ans, target_ans)
        em_score /= len(pred_answers)
        f1_score /= len(pred_answers)
        val_loss /= step
    return pred_answers, target_answers, val_loss, em_score, f1_score

### Finetuning the DISTILBERT Model

In [22]:
model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased')
model.to(device)

Downloading model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForQuestionAnswering: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to

DistilBertForQuestionAnswering(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
      

In [23]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

In [24]:
for epoch in range(NO_EPOCHS): #TRAINING
    loss, em, f1 = train(model, training_loader, optimizer)
    print(f'Epoch: {epoch}, Loss:  {loss}, Em: {em}, F1: {f1}') 
    pred_answers, target_answers, val_loss, em_score, f1_score = validator(model, val_loader)
    print(f'Loss:  {val_loss}, Em: {em_score}, F1: {f1_score}')
    torch.save(model, '/kaggle/working/fine_tuned_distilbert'+str(epoch)+'.model')

Starting training...


  6%|▌         | 1000/16278 [03:43<56:59,  4.47it/s] 

Train loss: 2.47885861492157, Exact Match: 0.32875, F1 Score: 0.3543814643564169


 12%|█▏        | 2000/16278 [07:26<52:46,  4.51it/s]

Train loss: 2.138604126483202, Exact Match: 0.3643125, F1 Score: 0.40304376611509757


 18%|█▊        | 3000/16278 [11:08<49:10,  4.50it/s]

Train loss: 1.975076231877009, Exact Match: 0.3914166666666667, F1 Score: 0.4345008868598513


 25%|██▍       | 4000/16278 [14:50<45:41,  4.48it/s]

Train loss: 1.87440377869457, Exact Match: 0.4106875, F1 Score: 0.4577861451751255


 31%|███       | 5000/16278 [18:32<42:39,  4.41it/s]

Train loss: 1.8084404752790928, Exact Match: 0.4246, F1 Score: 0.47441373419760413


 37%|███▋      | 6000/16278 [22:14<38:20,  4.47it/s]

Train loss: 1.7569679460724195, Exact Match: 0.4370833333333333, F1 Score: 0.48869126795492596


 43%|████▎     | 7000/16278 [25:56<34:17,  4.51it/s]

Train loss: 1.7136423795308386, Exact Match: 0.44876785714285716, F1 Score: 0.5019536504032713


 49%|████▉     | 8000/16278 [29:38<30:41,  4.50it/s]

Train loss: 1.6787715803384782, Exact Match: 0.457796875, F1 Score: 0.5118578195804319


 55%|█████▌    | 9000/16278 [33:20<26:49,  4.52it/s]

Train loss: 1.6485716029869186, Exact Match: 0.4664027777777778, F1 Score: 0.5216626186610254


 61%|██████▏   | 10000/16278 [37:02<23:09,  4.52it/s]

Train loss: 1.620785620805621, Exact Match: 0.47345, F1 Score: 0.5298996379774842


 68%|██████▊   | 11000/16278 [40:44<19:29,  4.51it/s]

Train loss: 1.5954905739805916, Exact Match: 0.4803068181818182, F1 Score: 0.5373220798226489


 74%|███████▎  | 12000/16278 [44:26<15:49,  4.51it/s]

Train loss: 1.5730317865634957, Exact Match: 0.48651041666666667, F1 Score: 0.5438327620299733


 80%|███████▉  | 13000/16278 [48:08<12:08,  4.50it/s]

Train loss: 1.5556007894529746, Exact Match: 0.4909807692307692, F1 Score: 0.5490829378248127


 86%|████████▌ | 14000/16278 [51:50<08:25,  4.51it/s]

Train loss: 1.5379371671240245, Exact Match: 0.49608035714285714, F1 Score: 0.5546548618724133


 92%|█████████▏| 15000/16278 [55:32<04:45,  4.48it/s]

Train loss: 1.5218353675792615, Exact Match: 0.5009583333333333, F1 Score: 0.5601634876685675


 98%|█████████▊| 16000/16278 [59:13<01:01,  4.52it/s]

Train loss: 1.5070197755070402, Exact Match: 0.5050625, F1 Score: 0.5648671178049472


100%|██████████| 16278/16278 [1:00:15<00:00,  4.50it/s]


Epoch: 0, Loss:  1.502662612375873, Em: 0.5061935077601235, F1: 0.566241410163972
Starting validation...


100%|██████████| 3279/3279 [03:55<00:00, 13.95it/s]


Loss:  1.5379856888769456, Em: 0.6741384568465996, F1: 0.7534325323656281
Starting training...


  6%|▌         | 1000/16278 [03:41<56:22,  4.52it/s] 

Train loss: 0.9917735268473625, Exact Match: 0.6435, F1 Score: 0.7100593590842931


 12%|█▏        | 2000/16278 [07:23<52:47,  4.51it/s]

Train loss: 0.990469312241301, Exact Match: 0.6441875, F1 Score: 0.7123541294387561


 18%|█▊        | 3000/16278 [11:05<48:57,  4.52it/s]

Train loss: 0.9906375086344779, Exact Match: 0.6414583333333334, F1 Score: 0.7093565363137208


 25%|██▍       | 4000/16278 [14:47<45:35,  4.49it/s]

Train loss: 0.9917639778098092, Exact Match: 0.64184375, F1 Score: 0.7108138621536444


 31%|███       | 5000/16278 [18:29<41:35,  4.52it/s]

Train loss: 0.9913638876907528, Exact Match: 0.6412, F1 Score: 0.7103188772109394


 37%|███▋      | 6000/16278 [22:10<37:53,  4.52it/s]

Train loss: 0.9966761316675693, Exact Match: 0.6406875, F1 Score: 0.7094959585473462


 43%|████▎     | 7000/16278 [25:52<34:13,  4.52it/s]

Train loss: 0.998014698362244, Exact Match: 0.64025, F1 Score: 0.7091484601823737


 49%|████▉     | 8000/16278 [29:34<30:24,  4.54it/s]

Train loss: 1.0004127969541587, Exact Match: 0.64184375, F1 Score: 0.7105230797075724


 55%|█████▌    | 9000/16278 [33:15<26:43,  4.54it/s]

Train loss: 1.0019198464308348, Exact Match: 0.6414583333333334, F1 Score: 0.7101686214968411


 61%|██████▏   | 10000/16278 [36:57<23:08,  4.52it/s]

Train loss: 1.003988309181109, Exact Match: 0.6413, F1 Score: 0.7100223479544038


 68%|██████▊   | 11000/16278 [40:39<19:30,  4.51it/s]

Train loss: 1.0074061858684502, Exact Match: 0.6401477272727273, F1 Score: 0.7089540332765625


 74%|███████▎  | 12000/16278 [44:21<15:47,  4.52it/s]

Train loss: 1.0081646627066656, Exact Match: 0.6403854166666667, F1 Score: 0.7093761627497757


 80%|███████▉  | 13000/16278 [48:02<12:08,  4.50it/s]

Train loss: 1.0098558953441679, Exact Match: 0.639673076923077, F1 Score: 0.7086960280434578


 86%|████████▌ | 14000/16278 [51:44<08:23,  4.52it/s]

Train loss: 1.0117498407728438, Exact Match: 0.6389017857142857, F1 Score: 0.7080455014684092


 92%|█████████▏| 15000/16278 [55:26<04:42,  4.53it/s]

Train loss: 1.0129491156436503, Exact Match: 0.6388, F1 Score: 0.7077987220258015


 98%|█████████▊| 16000/16278 [59:07<01:01,  4.52it/s]

Train loss: 1.0130286854512525, Exact Match: 0.6387578125, F1 Score: 0.707871448316658


100%|██████████| 16278/16278 [1:00:09<00:00,  4.51it/s]


Epoch: 1, Loss:  1.012834758408407, Em: 0.6386262930339357, F1: 0.707773121606307
Starting validation...


100%|██████████| 3279/3279 [03:57<00:00, 13.83it/s]


Loss:  1.5961362667411303, Em: 0.6404772796584325, F1: 0.7169584327268652
Starting training...


  6%|▌         | 1000/16278 [03:41<56:34,  4.50it/s] 

Train loss: 0.6959740587621928, Exact Match: 0.737375, F1 Score: 0.7997813033525141


 12%|█▏        | 2000/16278 [07:23<52:41,  4.52it/s]

Train loss: 0.7056283003166318, Exact Match: 0.73075, F1 Score: 0.7943554961063551


 18%|█▊        | 3000/16278 [11:05<49:00,  4.52it/s]

Train loss: 0.7148333846231302, Exact Match: 0.7262083333333333, F1 Score: 0.7898761909200425


 25%|██▍       | 4000/16278 [14:46<45:12,  4.53it/s]

Train loss: 0.7224421988762915, Exact Match: 0.72528125, F1 Score: 0.7896311565022041


 31%|███       | 5000/16278 [18:29<41:47,  4.50it/s]

Train loss: 0.7295769938245416, Exact Match: 0.7219, F1 Score: 0.7863598933532426


 37%|███▋      | 6000/16278 [22:11<37:59,  4.51it/s]

Train loss: 0.7363568323006232, Exact Match: 0.7212083333333333, F1 Score: 0.7857854386407235


 43%|████▎     | 7000/16278 [25:54<34:17,  4.51it/s]

Train loss: 0.7416794693980898, Exact Match: 0.7202678571428571, F1 Score: 0.7847260548352379


 49%|████▉     | 8000/16278 [29:36<30:38,  4.50it/s]

Train loss: 0.7494499186044559, Exact Match: 0.717578125, F1 Score: 0.7826117144776227


 55%|█████▌    | 9000/16278 [33:18<26:58,  4.50it/s]

Train loss: 0.7533632038310171, Exact Match: 0.7165972222222222, F1 Score: 0.7817739094828529


 61%|██████▏   | 10000/16278 [37:00<23:13,  4.50it/s]

Train loss: 0.7540947864100337, Exact Match: 0.7163875, F1 Score: 0.7813187777694294


 68%|██████▊   | 11000/16278 [40:42<19:29,  4.51it/s]

Train loss: 0.7586661421873353, Exact Match: 0.7153181818181819, F1 Score: 0.7804470665823825


 74%|███████▎  | 12000/16278 [44:24<15:48,  4.51it/s]

Train loss: 0.762160846424289, Exact Match: 0.7133541666666666, F1 Score: 0.7790454798263472


 80%|███████▉  | 13000/16278 [48:07<12:12,  4.48it/s]

Train loss: 0.7665235441163756, Exact Match: 0.7121153846153846, F1 Score: 0.7779157425367564


 86%|████████▌ | 14000/16278 [51:49<08:25,  4.51it/s]

Train loss: 0.7701578402489956, Exact Match: 0.7107678571428572, F1 Score: 0.7771033732495821


 92%|█████████▏| 15000/16278 [55:31<04:44,  4.49it/s]

Train loss: 0.7741906571793059, Exact Match: 0.70955, F1 Score: 0.7761628732477919


 98%|█████████▊| 16000/16278 [59:13<01:01,  4.52it/s]

Train loss: 0.7771379832674283, Exact Match: 0.7086875, F1 Score: 0.7752452874488994


100%|██████████| 16278/16278 [1:00:14<00:00,  4.50it/s]


Epoch: 2, Loss:  0.7773716197299813, Em: 0.7085326800648148, F1: 0.7749649202091591
Starting validation...


100%|██████████| 3279/3279 [03:56<00:00, 13.85it/s]


Loss:  1.7593336239725252, Em: 0.6723086306800854, F1: 0.7532890545030478
