In [1]:
import numpy as np
import pandas as pd
import nltk
import os
import json
import transformers
import torch
import random
from torch import cuda
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader
from transformers import ElectraTokenizer, ElectraModel, ElectraForQuestionAnswering

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [2]:
# Setting the random seed for consistent results
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(random_seed)

In [3]:
device = 'cuda' if cuda.is_available() else 'cpu'

In [4]:
device

'cuda'

### Loading the data

In [5]:
# Loading train data
train_file = open('/kaggle/input/squad-2/train-v2.0.json')
train_data = json.load(train_file)

# Loading validation data
val_file = open('/kaggle/input/squad-2/dev-v2.0.json')
val_data = json.load(val_file)

### Preprocessing the dataset

In [6]:
tokenizer = ElectraTokenizer.from_pretrained('google/electra-base-discriminator')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/27.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/666 [00:00<?, ?B/s]

In [7]:
def index_converter(context, context_tokenized):
    """
    Maps start str index to a tokenized index
    """
    mapper = {}
    curr = ''
    token_idx = 0
    for i, char in enumerate(context):
        if char != ' ' and char != '\n' and char != '\t' and char != '\r': # making sure current char is not whitespace
            curr += char
            if curr == context_tokenized[token_idx]:
                start = i - len(curr) + 1
                for j in range(start, i+1):
                    mapper[j] = (curr, token_idx)                
                curr = ''
                token_idx += 1
    if token_idx != len(context_tokenized): # skipping the data in case of issue with spanning
        return None
    return mapper

def preprocess_data(dataset, is_training=True, tokenized=True):
    """
    Parse the json_data object into a pandas readable data representation (list of dicts)
    """
    
    def _tokenize(seq):
        """
        Minimizes errors between tokenizers and encodings.
        Recommended in the paper BiDAF (Seo et al., 2016)
        """
        return [t.replace("``", '"').replace("''", '"') for t in seq.split()]
    
    examples = [] # store rows of data here for qa
    
    tokenization_errors = 0
    misaligned_ans_errors = 0
    num_impossibles = 0
    num_questions = 0
    
    for article_id in tqdm(range(len(dataset['data']))): # for each context
        paragraphs = dataset['data'][article_id]['paragraphs']
        for paragraph_id in range(len(paragraphs)):
            questions = dataset['data'][article_id]['paragraphs'][paragraph_id]['qas']
            
            context = paragraphs[paragraph_id]['context']
            context_tokenized = _tokenize(context)
                    
            for qid in range(len(questions)): # loop through questions
                num_questions += 1
                
                question = questions[qid]['question']
                question_tokenized = _tokenize(question)
                qas_id = questions[qid]['id']
                
                is_impossible = questions[qid]['is_impossible']
                
                if is_impossible: # check if question is impossible to answer
                    num_impossibles += 1
                    examples.append({'qas_id': qas_id, 
                                     'question':question_tokenized if tokenized else question, 
                                     'context': context_tokenized if tokenized else context, 
                                     'answer':'', 
                                     'is_impossible': is_impossible,
                                     'start_pos': -1, 
                                     'end_pos':-1,
                                    'santiy_check': context_tokenized[-1:0]})
                    continue
                    
                # question is not impossible, continue parsing
                answers = questions[qid]['answers']
                
                for ans_id in range(len(answers)): # for each answer
                    answer = answers[ans_id]['text']
                    start_pos = answers[ans_id]['answer_start'] # inclusive start index in raw context
                    end_pos = start_pos + len(answer) #exclusive end index in raw context
                          
                    if context[start_pos:end_pos] != answer:
                        misaligned_ans_errors += 1
                        continue
                        
                    if tokenized:
                        mapper = index_converter(context, context_tokenized)
                        if mapper is None:
                            tokenization_errors += 1
                            continue
                        
                        start_pos = mapper[start_pos][1]
                        end_pos = mapper[end_pos-1][1] # inclusive
                    
                    examples.append({'qas_id': qas_id, 
                                     'question':question_tokenized if tokenized else question, 
                                     'context': context_tokenized if tokenized else context, 
                                     'answer':answer, 
                                     'is_impossible': is_impossible,
                                     'start_pos': start_pos, 
                                     'end_pos':end_pos,
                                    'santiy_check': context_tokenized[start_pos:end_pos+1] if tokenized else context[start_pos:end_pos+1]})
            
                    
    print('No. of questions:{}'.format(num_questions))
    return examples

Preprocessing of Train data

In [8]:
train_processed = preprocess_data(train_data)

100%|██████████| 442/442 [00:40<00:00, 10.94it/s]

No. of questions:130319





In [9]:
train_df = pd.DataFrame(train_processed)
train_df[:5]

Unnamed: 0,qas_id,question,context,answer,is_impossible,start_pos,end_pos,santiy_check
0,56be85543aeaaa14008c9063,"[When, did, Beyonce, start, becoming, popular?]","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",in the late 1990s,False,39,42,"[in, the, late, 1990s]"
1,56be85543aeaaa14008c9065,"[What, areas, did, Beyonce, compete, in, when,...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",singing and dancing,False,28,30,"[singing, and, dancing]"
2,56be85543aeaaa14008c9066,"[When, did, Beyonce, leave, Destiny's, Child, ...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",2003,False,82,82,"[(2003),]"
3,56bf6b0f3aeaaa14008c9601,"[In, what, city, and, state, did, Beyonce, gro...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...","Houston, Texas",False,22,23,"[Houston,, Texas,]"
4,56bf6b0f3aeaaa14008c9602,"[In, which, decade, did, Beyonce, become, famo...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",late 1990s,False,41,42,"[late, 1990s]"


In [10]:
def encode_data(processed_data, tokenizer, max_len, max_query_len):
    """
    Converts examples of data into Electra input format tensors.
    """
    context_length_errors = 0
    encoded_data = []
    for sample in tqdm(processed_data):
        question_raw = ' '.join(sample['question'])
        context_raw = ' '.join(sample['context'])
        if len(question_raw) > max_query_len:
            question_raw = question_raw[:max_query_len]
        
        # encode the data using the tokenizer
        encoded = tokenizer.encode_plus(question_raw, context_raw,
                                        max_length=max_len,
                                        padding='max_length',
                                        truncation='only_second',
                                        return_token_type_ids=True)
        if sample['is_impossible']:
            start = -1
            end = -1
        else: # Adjust the start_pos and end_pos 
            input_ids = encoded['input_ids']
            answer_ids = tokenizer.encode(sample['answer']) 
            start, end = 0, 0
            for i in range(len(input_ids)):
                if input_ids[i: i+len(answer_ids[1:-1])] == answer_ids[1:-1]:
                    start = i
                    end = i + len(answer_ids[1:-1]) - 1
                    break
            
        ids = encoded['input_ids']
        token_type_ids = encoded['token_type_ids']
        mask = encoded['attention_mask']
        
        assert len(ids) == max_len
        assert len(token_type_ids) == max_len
        assert len(mask) == max_len
        
        encoded_data.append({'ids': ids,
                      'token_type_ids': token_type_ids,
                      'mask': mask,
                      'start_pos': start,
                      'end_pos': end})        
    return encoded_data

In [11]:
# Model Settings
MAX_SEQ_LEN = 512
MAX_QN_LEN = 128
NO_EPOCHS = 3
BATCH_SIZE = 8
LEARNING_RATE = 5e-05
OUT = 2

In [12]:
train_encoded = encode_data(train_processed, tokenizer, MAX_SEQ_LEN, MAX_QN_LEN)

100%|██████████| 130217/130217 [12:32<00:00, 173.03it/s]


In [13]:
# Creation of dataloader
input_ids = torch.tensor([sample['ids'] for sample in train_encoded], dtype=torch.long)
input_masks = torch.tensor([sample['mask'] for sample in train_encoded], dtype=torch.long)
segment_ids = torch.tensor([sample['token_type_ids'] for sample in train_encoded], dtype=torch.long)

start_positions = torch.tensor([sample['start_pos'] for sample in train_encoded], dtype=torch.long)
end_positions = torch.tensor([sample['end_pos'] for sample in train_encoded], dtype=torch.long)
train_dataset = TensorDataset(input_ids, input_masks, segment_ids, start_positions, end_positions)

train_params = {'batch_size': BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(train_dataset, **train_params)

In [14]:
val_processed = preprocess_data(val_data)

100%|██████████| 35/35 [00:10<00:00,  3.48it/s]

No. of questions:11873





Preprocessing of Validation Data

In [15]:
val_df = pd.DataFrame(val_processed)

In [16]:
val_df[:5]

Unnamed: 0,qas_id,question,context,answer,is_impossible,start_pos,end_pos,santiy_check
0,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
1,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
2,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
3,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
4,56ddde6b9a695914005b9629,"[When, were, the, Normans, in, Normandy?]","[The, Normans, (Norman:, Nourmands;, French:, ...",10th and 11th centuries,False,14,17,"[10th, and, 11th, centuries]"


In [17]:
val_encoded = encode_data(val_processed, tokenizer, MAX_SEQ_LEN, MAX_QN_LEN)

100%|██████████| 26232/26232 [02:44<00:00, 159.54it/s]


In [18]:
# Creation of Dataloader
input_ids = torch.tensor([sample['ids'] for sample in val_encoded], dtype=torch.long)
input_mask = torch.tensor([sample['mask'] for sample in val_encoded], dtype=torch.long)
segment_ids = torch.tensor([sample['token_type_ids'] for sample in val_encoded], dtype=torch.long)

start_positions = torch.tensor([sample['start_pos'] for sample in val_encoded], dtype=torch.long)
end_positions = torch.tensor([sample['end_pos'] for sample in val_encoded], dtype=torch.long)

val_dataset = TensorDataset(input_ids, input_mask, segment_ids, start_positions, end_positions)

val_params = {'batch_size': BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }    

val_loader = DataLoader(val_dataset, **val_params)

Evaluation Metrics for Prediction

In [19]:
# Functions to calculate Evaluation Metrics
def normalize_text(s):
    """Removing articles, punctuation, and standardizing whitespace are all typical text processing steps."""
    import string, re
    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
        return re.sub(regex, " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

def compute_f1(prediction, truth):
    ''' Calculates the F1 score'''
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()

    # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)

    common_tokens = set(pred_tokens) & set(truth_tokens)

    # if there are no common tokens then f1 = 0
    if len(common_tokens) == 0:
        return 0

    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)

    return 2 * (prec * rec) / (prec + rec)

def compute_exact_match(prediction, truth):
    ''' Computes the exact match score '''
    return int(normalize_text(prediction) == normalize_text(truth))

In [20]:
def train(model, training_loader, optimizer):
    ''' Training finetunes the ELECTRA model for Question Answering'''
    print('Starting training...')
    step = 0
    counter = 0
    loss_tracker = 0
    em_score = 0
    f1_score = 0
    model.zero_grad()
    model.train()
    counter = 0
    for data in tqdm(training_loader):
        data = tuple(d.to(device) for d in data)
        inputs = {'input_ids':     data[0],
                'attention_mask':  data[1], 
                'token_type_ids':  data[2],  
                'start_positions': data[3], 
                'end_positions':   data[4]}
        outputs = model(**inputs)
        loss = outputs[0]
        loss.backward() # back propagation
        optimizer.step()
        model.zero_grad()
        
        starts = outputs[1]
        ends = outputs[2]
        target_starts = data[3]
        target_ends = data[4]
        
        for i,(s,e) in enumerate(zip(starts, ends)):
            start_pred = torch.argmax(s)
            end_pred = torch.argmax(e)
            predicted_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][start_pred : end_pred+1]))
            predicted_answer = predicted_answer if predicted_answer!= '[CLS]' else ''
            actual_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][target_starts[i] : target_ends[i]+1]))
            curr_em = compute_exact_match(predicted_answer, actual_answer)
            curr_f1 = compute_f1(predicted_answer, actual_answer)
            em_score += curr_em
            f1_score += curr_f1
            counter += 1
                
        loss_tracker += loss.item()
        step += 1
        if step % 1000 == 0:
            print("Train loss: {}, Exact Match: {}, F1 Score: {}".format(loss_tracker/step, em_score/counter, f1_score/counter))

    return loss_tracker/step, em_score/counter, f1_score/counter


def validator(model, testing_loader):
    ''' Performs Prediction of the Answers '''
    print('Starting validation...')
    model.eval()
    preds = []
    targs = []
    pred_answers = []
    target_answers = []
    val_loss = 0
    step = 0
    with torch.no_grad():
        for data in tqdm(testing_loader):
            data = tuple(d.to(device) for d in data)
            inputs = {'input_ids': data[0],
                'attention_mask':  data[1], 
                'token_type_ids':  data[2],
                'start_positions': data[3], 
                'end_positions':   data[4]}
            output = model(**inputs)
            valloss = output.loss
            val_loss += valloss.item()
            starts = output[1]
            ends = output[2]
            start_preds = []
            end_preds = []
            
            target_starts = data[3]
            target_ends = data[4]
            step += 1

            for s,e in zip(starts, ends):
                start_pred = torch.argmax(s)
                start_preds.append(start_pred)
                end_pred = torch.argmax(e)
                end_preds.append(end_pred)

            for i, (s,e) in enumerate(zip(start_preds, target_ends)):
                preds.append((s,e))
                predicted_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][s : e+1]))
                pred_answers.append(predicted_answer)
            
            for i, (s,e) in enumerate(zip(target_starts, target_ends)):
                targs.append((s,e))
                actual_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][s : e+1]))
                target_answers.append(actual_answer)
                
        em_score = 0
        f1_score = 0
        pred_answers = [item if item != '[CLS]' else '' for item in pred_answers]
        for predicted_ans, target_ans in zip(pred_answers, target_answers):
            em_score += compute_exact_match(predicted_ans, target_ans)
            f1_score += compute_f1(predicted_ans, target_ans)
        em_score /= len(pred_answers)
        f1_score /= len(pred_answers)
        val_loss /= step
    return pred_answers, target_answers, val_loss, em_score, f1_score

### Finetuning the ELECTRA Model

In [22]:
model = ElectraForQuestionAnswering.from_pretrained('google/electra-base-discriminator')
model.to(device)

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/electra-base-discriminator were not used when initializing ElectraForQuestionAnswering: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForQuestionAnswering were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['qa_outputs.bias', 'qa_outputs.we

ElectraForQuestionAnswering(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0-11): 12 x ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerN

In [23]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

In [24]:
for epoch in range(NO_EPOCHS): #TRAINING
    loss, em, f1 = train(model, training_loader, optimizer)
    print(f'Epoch: {epoch}, Loss:  {loss}, Em: {em}, F1: {f1}') 
    pred_answers, target_answers, val_loss, em_score, f1_score = validator(model, val_loader)
    print(f'Loss:  {val_loss}, Em: {em_score}, F1: {f1_score}')
    torch.save(model, '/kaggle/working/fine_tuned_electra'+str(epoch)+'.model')

Starting training...


  6%|▌         | 1000/16278 [07:16<1:50:29,  2.30it/s]

Train loss: 2.257298290669918, Exact Match: 0.3325, F1 Score: 0.4092865794492176


 12%|█▏        | 2000/16278 [14:29<1:42:56,  2.31it/s]

Train loss: 1.9815798905193807, Exact Match: 0.3844375, F1 Score: 0.4665724131950755


 18%|█▊        | 3000/16278 [21:43<1:35:43,  2.31it/s]

Train loss: 1.8474137058754763, Exact Match: 0.4103333333333333, F1 Score: 0.4941281084666153


 25%|██▍       | 4000/16278 [28:57<1:29:42,  2.28it/s]

Train loss: 1.7649945277273655, Exact Match: 0.42790625, F1 Score: 0.5119062245614254


 31%|███       | 5000/16278 [36:10<1:21:28,  2.31it/s]

Train loss: 1.708784855657816, Exact Match: 0.439575, F1 Score: 0.5254793234234655


 37%|███▋      | 6000/16278 [43:23<1:14:33,  2.30it/s]

Train loss: 1.6672925110707681, Exact Match: 0.448, F1 Score: 0.5341009610376782


 43%|████▎     | 7000/16278 [50:36<1:06:48,  2.31it/s]

Train loss: 1.6337495823970862, Exact Match: 0.45626785714285717, F1 Score: 0.5421724972009891


 49%|████▉     | 8000/16278 [57:50<59:46,  2.31it/s]  

Train loss: 1.6059015740565956, Exact Match: 0.46234375, F1 Score: 0.5474015382132634


 55%|█████▌    | 9000/16278 [1:05:03<52:46,  2.30it/s]

Train loss: 1.579382317248318, Exact Match: 0.46768055555555554, F1 Score: 0.5531772490528508


 61%|██████▏   | 10000/16278 [1:12:16<45:11,  2.31it/s]

Train loss: 1.5599040645092725, Exact Match: 0.471925, F1 Score: 0.5576545896247355


 68%|██████▊   | 11000/16278 [1:19:30<38:13,  2.30it/s]

Train loss: 1.5442760518518361, Exact Match: 0.4753181818181818, F1 Score: 0.5609560384303799


 74%|███████▎  | 12000/16278 [1:26:43<30:45,  2.32it/s]

Train loss: 1.530880736236771, Exact Match: 0.47789583333333335, F1 Score: 0.5633761085498853


 80%|███████▉  | 13000/16278 [1:33:57<23:33,  2.32it/s]

Train loss: 1.519468747228384, Exact Match: 0.48057692307692307, F1 Score: 0.5661749456182481


 86%|████████▌ | 14000/16278 [1:41:10<16:25,  2.31it/s]

Train loss: 1.5067433749330896, Exact Match: 0.483875, F1 Score: 0.56943067560867


 92%|█████████▏| 15000/16278 [1:48:23<09:17,  2.29it/s]

Train loss: 1.4969059985856215, Exact Match: 0.485625, F1 Score: 0.5710138322838506


 98%|█████████▊| 16000/16278 [1:55:37<02:00,  2.31it/s]

Train loss: 1.4850976998731493, Exact Match: 0.4880234375, F1 Score: 0.5736287042144831


100%|██████████| 16278/16278 [1:57:37<00:00,  2.31it/s]


Epoch: 0, Loss:  1.4822412325381236, Em: 0.4887226706190436, F1: 0.5743957333285833
Starting validation...


100%|██████████| 3279/3279 [07:33<00:00,  7.22it/s]


Loss:  1.4093137712568071, Em: 0.7663921927416896, F1: 0.8419866418392424
Starting training...


  6%|▌         | 1000/16278 [07:13<1:50:18,  2.31it/s]

Train loss: 1.1052763902246951, Exact Match: 0.56775, F1 Score: 0.645183872684568


 12%|█▏        | 2000/16278 [14:26<1:42:40,  2.32it/s]

Train loss: 1.1047375701665878, Exact Match: 0.5724375, F1 Score: 0.649411246144707


 18%|█▊        | 3000/16278 [21:40<1:35:34,  2.32it/s]

Train loss: 1.1361906071454286, Exact Match: 0.5685833333333333, F1 Score: 0.6474149987688114


 25%|██▍       | 4000/16278 [28:53<1:29:07,  2.30it/s]

Train loss: 1.1322428409792482, Exact Match: 0.5703125, F1 Score: 0.6501964260225642


 31%|███       | 5000/16278 [36:07<1:21:08,  2.32it/s]

Train loss: 1.1323554707497359, Exact Match: 0.568525, F1 Score: 0.6479610750298747


 37%|███▋      | 6000/16278 [43:20<1:14:18,  2.31it/s]

Train loss: 1.1285220480983456, Exact Match: 0.5688541666666667, F1 Score: 0.6484377947147796


 43%|████▎     | 7000/16278 [50:33<1:07:04,  2.31it/s]

Train loss: 1.1284509751221963, Exact Match: 0.567375, F1 Score: 0.648084978812085


 49%|████▉     | 8000/16278 [57:46<59:54,  2.30it/s]  

Train loss: 1.1302967326249926, Exact Match: 0.5660625, F1 Score: 0.6468249753119036


 55%|█████▌    | 9000/16278 [1:05:00<52:27,  2.31it/s]

Train loss: 1.131004794943664, Exact Match: 0.5665138888888889, F1 Score: 0.6473136346329298


 61%|██████▏   | 10000/16278 [1:12:13<45:06,  2.32it/s]

Train loss: 1.1310025657832623, Exact Match: 0.565375, F1 Score: 0.6460901106845375


 68%|██████▊   | 11000/16278 [1:19:26<38:03,  2.31it/s]

Train loss: 1.1322819618745283, Exact Match: 0.5651477272727272, F1 Score: 0.6457723348756903


 74%|███████▎  | 12000/16278 [1:26:40<30:49,  2.31it/s]

Train loss: 1.13250890213872, Exact Match: 0.5649166666666666, F1 Score: 0.6457116982967629


 80%|███████▉  | 13000/16278 [1:33:53<23:35,  2.32it/s]

Train loss: 1.1322673233850644, Exact Match: 0.5647211538461538, F1 Score: 0.6456518278590764


 86%|████████▌ | 14000/16278 [1:41:07<16:29,  2.30it/s]

Train loss: 1.133531950847379, Exact Match: 0.5644017857142857, F1 Score: 0.6454200433270476


 92%|█████████▏| 15000/16278 [1:48:20<09:15,  2.30it/s]

Train loss: 1.1327028562376897, Exact Match: 0.5644333333333333, F1 Score: 0.6455324341337922


 98%|█████████▊| 16000/16278 [1:55:34<02:00,  2.32it/s]

Train loss: 1.1334482663860546, Exact Match: 0.563609375, F1 Score: 0.6447861967551006


100%|██████████| 16278/16278 [1:57:34<00:00,  2.31it/s]


Epoch: 1, Loss:  1.1339856157577555, Em: 0.5635208920494252, F1: 0.6446315090737822
Starting validation...


100%|██████████| 3279/3279 [07:34<00:00,  7.21it/s]


Loss:  1.4790602469633323, Em: 0.761741384568466, F1: 0.8359031776951983
Starting training...


  6%|▌         | 1000/16278 [07:13<1:50:28,  2.30it/s]

Train loss: 0.9146091934889555, Exact Match: 0.61925, F1 Score: 0.6927773235334815


 12%|█▏        | 2000/16278 [14:26<1:43:16,  2.30it/s]

Train loss: 0.9095034807771444, Exact Match: 0.618625, F1 Score: 0.6933275464137119


 18%|█▊        | 3000/16278 [21:40<1:35:40,  2.31it/s]

Train loss: 0.9188480047533909, Exact Match: 0.616625, F1 Score: 0.6907486916126169


 25%|██▍       | 4000/16278 [28:53<1:29:04,  2.30it/s]

Train loss: 0.925996621368453, Exact Match: 0.6133125, F1 Score: 0.6874757712481501


 31%|███       | 5000/16278 [36:07<1:21:16,  2.31it/s]

Train loss: 0.9318680788710714, Exact Match: 0.612325, F1 Score: 0.6861527671598062


 37%|███▋      | 6000/16278 [43:20<1:14:08,  2.31it/s]

Train loss: 0.9393819619653125, Exact Match: 0.6118541666666667, F1 Score: 0.6851310435379908


 43%|████▎     | 7000/16278 [50:34<1:07:03,  2.31it/s]

Train loss: 0.942881225902055, Exact Match: 0.6103392857142858, F1 Score: 0.6835775419174301


 49%|████▉     | 8000/16278 [57:47<1:00:28,  2.28it/s]

Train loss: 0.9478172356644645, Exact Match: 0.60875, F1 Score: 0.6826002224586872


 55%|█████▌    | 9000/16278 [1:05:01<52:32,  2.31it/s]

Train loss: 0.9511225775140855, Exact Match: 0.6081388888888889, F1 Score: 0.6825143447709032


 61%|██████▏   | 10000/16278 [1:12:14<45:28,  2.30it/s]

Train loss: 0.9561769568048417, Exact Match: 0.6072375, F1 Score: 0.681953966812798


 68%|██████▊   | 11000/16278 [1:19:28<38:23,  2.29it/s]

Train loss: 0.9596717871136285, Exact Match: 0.6058977272727273, F1 Score: 0.6809765786195557


 74%|███████▎  | 12000/16278 [1:26:42<30:53,  2.31it/s]

Train loss: 0.9613804509819796, Exact Match: 0.6050104166666667, F1 Score: 0.6800166796546042


 80%|███████▉  | 13000/16278 [1:33:55<23:42,  2.30it/s]

Train loss: 0.9653271642900431, Exact Match: 0.6042019230769231, F1 Score: 0.6791418387782584


 86%|████████▌ | 14000/16278 [1:41:09<16:28,  2.30it/s]

Train loss: 0.968234985084406, Exact Match: 0.6032410714285714, F1 Score: 0.6784162689062176


 92%|█████████▏| 15000/16278 [1:48:23<09:15,  2.30it/s]

Train loss: 0.9717069536139568, Exact Match: 0.6022, F1 Score: 0.6773384063470559


 98%|█████████▊| 16000/16278 [1:55:36<02:00,  2.31it/s]

Train loss: 0.9730943836579099, Exact Match: 0.6016015625, F1 Score: 0.6770966638214976


100%|██████████| 16278/16278 [1:57:36<00:00,  2.31it/s]


Epoch: 2, Loss:  0.9740563340904814, Em: 0.6014959644286076, F1: 0.6770540627827345
Starting validation...


100%|██████████| 3279/3279 [07:33<00:00,  7.23it/s]


Loss:  1.4832015226402817, Em: 0.7652485513876182, F1: 0.8431609297202848
