In [16]:
import numpy as np
import pandas as pd
import nltk
import os
import json
import transformers
import torch
import random
from torch import cuda
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader
from transformers import AlbertTokenizer, AlbertModel, AlbertForQuestionAnswering

In [17]:
# Setting the random seed for consistent results
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(random_seed)

In [18]:
device = 'cuda' if cuda.is_available() else 'cpu'

In [19]:
device

'cuda'

### Loading the data

In [20]:
# Loading train data
train_file = open('/kaggle/input/squad-2/train-v2.0.json')
train_data = json.load(train_file)

# Loading validation data
val_file = open('/kaggle/input/squad-2/dev-v2.0.json')
val_data = json.load(val_file)

### Preprocessing the dataset

In [21]:
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

In [22]:
def index_converter(context, context_tokenized):
    """
    Maps start str index to a tokenized index
    """
    mapper = {}
    curr = ''
    token_idx = 0
    for i, char in enumerate(context):
        if char != ' ' and char != '\n' and char != '\t' and char != '\r': # making sure current char is not whitespace
            curr += char
            if curr == context_tokenized[token_idx]:
                start = i - len(curr) + 1
                for j in range(start, i+1):
                    mapper[j] = (curr, token_idx)                
                curr = ''
                token_idx += 1
    if token_idx != len(context_tokenized):
        return None
    return mapper

def preprocess_data(dataset, is_training=True, tokenized=True):
    """
    Extracts the info from json_data object into a pandas readable data representation (list of dicts)
    """
    
    def _tokenize(seq):
        """
        Performing tokenization to minimize errors between tokenizers and encodings.
        """
        return [t.replace("``", '"').replace("''", '"') for t in seq.split()]
    
    examples = [] # store rows of data here for qa
    
    tokenization_errors = 0
    misaligned_ans_errors = 0
    num_impossibles = 0
    num_questions = 0
    
    for article_id in tqdm(range(len(dataset['data']))):
        paragraphs = dataset['data'][article_id]['paragraphs']
        for paragraph_id in range(len(paragraphs)):
            questions = dataset['data'][article_id]['paragraphs'][paragraph_id]['qas']
            
            context = paragraphs[paragraph_id]['context']
            context_tokenized = _tokenize(context)
                    
            for qid in range(len(questions)): 
                num_questions += 1
                
                question = questions[qid]['question']
                question_tokenized = _tokenize(question)
                qas_id = questions[qid]['id']
                
                is_impossible = questions[qid]['is_impossible']
                
                if is_impossible: 
                    num_impossibles += 1
                    examples.append({'qas_id': qas_id, 
                                     'question':question_tokenized if tokenized else question, 
                                     'context': context_tokenized if tokenized else context, 
                                     'answer':'', 
                                     'is_impossible': is_impossible,
                                     'start_pos': -1, 
                                     'end_pos':-1,
                                    'santiy_check': context_tokenized[-1:0]})
                    continue
                    
                answers = questions[qid]['answers']
                
                for ans_id in range(len(answers)): 
                    answer = answers[ans_id]['text']
                    start_pos = answers[ans_id]['answer_start'] 
                    end_pos = start_pos + len(answer)
                          
                    if context[start_pos:end_pos] != answer:
                        misaligned_ans_errors += 1
                        continue
                        
                    if tokenized:
                        mapper = index_converter(context, context_tokenized)
                        if mapper is None:
                            tokenization_errors += 1
                            continue
                        
                        start_pos = mapper[start_pos][1]
                        end_pos = mapper[end_pos-1][1] # inclusive
                    
                    examples.append({'qas_id': qas_id, 
                                     'question':question_tokenized if tokenized else question, 
                                     'context': context_tokenized if tokenized else context, 
                                     'answer':answer, 
                                     'is_impossible': is_impossible,
                                     'start_pos': start_pos, 
                                     'end_pos':end_pos,
                                    'santiy_check': context_tokenized[start_pos:end_pos+1] if tokenized else context[start_pos:end_pos+1]})
            
                    
    print('No. of questions:{}'.format(num_questions))
    return examples

Preprocessing of Train data

In [23]:
train_processed = preprocess_data(train_data)

100%|██████████| 442/442 [00:48<00:00,  9.11it/s]

No. of questions:130319





In [24]:
train_df = pd.DataFrame(train_processed)
train_df[:5]

Unnamed: 0,qas_id,question,context,answer,is_impossible,start_pos,end_pos,santiy_check
0,56be85543aeaaa14008c9063,"[When, did, Beyonce, start, becoming, popular?]","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",in the late 1990s,False,39,42,"[in, the, late, 1990s]"
1,56be85543aeaaa14008c9065,"[What, areas, did, Beyonce, compete, in, when,...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",singing and dancing,False,28,30,"[singing, and, dancing]"
2,56be85543aeaaa14008c9066,"[When, did, Beyonce, leave, Destiny's, Child, ...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",2003,False,82,82,"[(2003),]"
3,56bf6b0f3aeaaa14008c9601,"[In, what, city, and, state, did, Beyonce, gro...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...","Houston, Texas",False,22,23,"[Houston,, Texas,]"
4,56bf6b0f3aeaaa14008c9602,"[In, which, decade, did, Beyonce, become, famo...","[Beyoncé, Giselle, Knowles-Carter, (/biːˈjɒnse...",late 1990s,False,41,42,"[late, 1990s]"


In [26]:
def encode_data(processed_data, tokenizer, max_len, max_query_len):
    """
    Converts examples of data into ALBERT input format tensors.
    """
    context_length_errors = 0
    encoded_data = []
    for sample in tqdm(processed_data):
        question_raw = ' '.join(sample['question'])
        context_raw = ' '.join(sample['context'])
        if len(question_raw) > max_query_len:
            question_raw = question_raw[:max_query_len]
        
        # encode the data using the tokenizer
        encoded = tokenizer.encode_plus(question_raw, context_raw,
                                        max_length=max_len,
                                        padding='max_length',
                                        truncation='only_second',
                                        return_token_type_ids=True)
        if sample['is_impossible']:
            start = -1
            end = -1
        else: 
            input_ids = encoded['input_ids']
            answer_ids = tokenizer.encode(sample['answer']) 
            start, end = 0, 0 
            for i in range(len(input_ids)):
                if input_ids[i: i+len(answer_ids[1:-1])] == answer_ids[1:-1]:
                    start = i
                    end = i + len(answer_ids[1:-1]) - 1
                    break
            
        ids = encoded['input_ids']
        token_type_ids = encoded['token_type_ids']
        mask = encoded['attention_mask']
        
        assert len(ids) == max_len
        assert len(token_type_ids) == max_len
        assert len(mask) == max_len
        
        encoded_data.append({'ids': ids,
                      'token_type_ids': token_type_ids,
                      'mask': mask,
                      'start_pos': start,
                      'end_pos': end})        
    return encoded_data

Finetuning of model

In [27]:
# Model Settings
MAX_SEQ_LEN = 512
MAX_QN_LEN = 128
NO_EPOCHS = 3
BATCH_SIZE = 8
LEARNING_RATE = 5e-05
OUT = 2

In [28]:
train_encoded = encode_data(train_processed, tokenizer, MAX_SEQ_LEN, MAX_QN_LEN)

100%|██████████| 130217/130217 [05:38<00:00, 384.71it/s]


In [29]:
# Creation of Dataloader
input_ids = torch.tensor([sample['ids'] for sample in train_encoded], dtype=torch.long)
input_masks = torch.tensor([sample['mask'] for sample in train_encoded], dtype=torch.long)
segment_ids = torch.tensor([sample['token_type_ids'] for sample in train_encoded], dtype=torch.long)

start_positions = torch.tensor([sample['start_pos'] for sample in train_encoded], dtype=torch.long)
end_positions = torch.tensor([sample['end_pos'] for sample in train_encoded], dtype=torch.long)
train_dataset = TensorDataset(input_ids, input_masks, segment_ids, start_positions, end_positions)

train_params = {'batch_size': BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(train_dataset, **train_params)

Preprocessing of Validation Data

In [30]:
val_processed = preprocess_data(val_data)

100%|██████████| 35/35 [00:10<00:00,  3.19it/s]

No. of questions:11873





In [31]:
val_df = pd.DataFrame(val_processed)

In [32]:
val_df[:5]

Unnamed: 0,qas_id,question,context,answer,is_impossible,start_pos,end_pos,santiy_check
0,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
1,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
2,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
3,56ddde6b9a695914005b9628,"[In, what, country, is, Normandy, located?]","[The, Normans, (Norman:, Nourmands;, French:, ...",France,False,26,26,[France.]
4,56ddde6b9a695914005b9629,"[When, were, the, Normans, in, Normandy?]","[The, Normans, (Norman:, Nourmands;, French:, ...",10th and 11th centuries,False,14,17,"[10th, and, 11th, centuries]"


In [33]:
val_encoded = encode_data(val_processed, tokenizer, MAX_SEQ_LEN, MAX_QN_LEN)

100%|██████████| 26232/26232 [01:12<00:00, 361.67it/s]


In [34]:
# Creation of Dataloader
input_ids = torch.tensor([sample['ids'] for sample in val_encoded], dtype=torch.long)
input_mask = torch.tensor([sample['mask'] for sample in val_encoded], dtype=torch.long)
segment_ids = torch.tensor([sample['token_type_ids'] for sample in val_encoded], dtype=torch.long)

start_positions = torch.tensor([sample['start_pos'] for sample in val_encoded], dtype=torch.long)
end_positions = torch.tensor([sample['end_pos'] for sample in val_encoded], dtype=torch.long)

val_dataset = TensorDataset(input_ids, input_mask, segment_ids, start_positions, end_positions)

val_params = {'batch_size': BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }    

val_loader = DataLoader(val_dataset, **val_params)

Evaluation Metrics for Prediction

In [35]:
# Functions to calculate Evaluation Metrics
def normalize_text(s):
    """Removing articles, punctuation, and standardizing whitespace are all typical text processing steps."""
    import string, re
    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
        return re.sub(regex, " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

def compute_f1(prediction, truth):
    ''' Calculates the F1 score'''
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()

    # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)

    common_tokens = set(pred_tokens) & set(truth_tokens)

    # if there are no common tokens then f1 = 0
    if len(common_tokens) == 0:
        return 0

    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)

    return 2 * (prec * rec) / (prec + rec)

def compute_exact_match(prediction, truth):
    ''' Computes the exact match score '''
    return int(normalize_text(prediction) == normalize_text(truth))

In [36]:
def train(model, training_loader, optimizer):
    ''' Training finetunes the ALBERT model for Question Answering'''
    step = 0
    counter = 0
    loss_tracker = 0
    em_score = 0
    f1_score = 0
    model.zero_grad()
    model.train()
    counter = 0
    for data in tqdm(training_loader):
        data = tuple(d.to(device) for d in data)
        inputs = {'input_ids':     data[0],
                'attention_mask':  data[1], 
                'token_type_ids':  data[2],  
                'start_positions': data[3], 
                'end_positions':   data[4]}
        outputs = model(**inputs)
        loss = outputs[0]
        loss.backward() # back propagation
        optimizer.step()
        model.zero_grad()
        
        starts = outputs[1]
        ends = outputs[2]
        target_starts = data[3]
        target_ends = data[4]
        
        for i,(s,e) in enumerate(zip(starts, ends)):
            start_pred = torch.argmax(s)
            end_pred = torch.argmax(e)
            predicted_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][start_pred : end_pred+1]))
            predicted_answer = predicted_answer if predicted_answer!= '[CLS]' else ''
            actual_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][target_starts[i] : target_ends[i]+1]))
            curr_em = compute_exact_match(predicted_answer, actual_answer)
            curr_f1 = compute_f1(predicted_answer, actual_answer)
            em_score += curr_em
            f1_score += curr_f1
            counter += 1
                
        loss_tracker += loss.item()
        step += 1
        if step % 1000 == 0:
            print("Train loss: {}, Exact Match: {}, F1 Score: {}".format(loss_tracker/step, em_score/counter, f1_score/counter))

    return loss_tracker/step, em_score/counter, f1_score/counter


def validator(model, testing_loader):
    ''' Performs Prediction of the Answers'''
    print('Starting validation...')
    model.eval()
    preds = []
    targs = []
    pred_answers = []
    target_answers = []
    val_loss = 0
    step = 0
    with torch.no_grad():
        for data in tqdm(testing_loader):
            data = tuple(d.to(device) for d in data)
            inputs = {'input_ids': data[0],
                'attention_mask':  data[1], 
                'token_type_ids':  data[2],
                'start_positions': data[3], 
                'end_positions':   data[4]}
            output = model(**inputs)
            valloss = output.loss
            val_loss += valloss.item()
            starts = output[1]
            ends = output[2]
            start_preds = []
            end_preds = []
            
            target_starts = data[3]
            target_ends = data[4]
            step += 1

            for s,e in zip(starts, ends):
                start_pred = torch.argmax(s)
                start_preds.append(start_pred)
                end_pred = torch.argmax(e)
                end_preds.append(end_pred)

            for i, (s,e) in enumerate(zip(start_preds, target_ends)):
                preds.append((s,e))
                predicted_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][s : e+1]))
                pred_answers.append(predicted_answer)
            
            for i, (s,e) in enumerate(zip(target_starts, target_ends)):
                targs.append((s,e))
                actual_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][i][s : e+1]))
                target_answers.append(actual_answer)
                
        em_score = 0
        f1_score = 0
        pred_answers = [item if item != '[CLS]' else '' for item in pred_answers]
        for predicted_ans, target_ans in zip(pred_answers, target_answers):
            em_score += compute_exact_match(predicted_ans, target_ans)
            f1_score += compute_f1(predicted_ans, target_ans)
        em_score /= len(pred_answers)
        f1_score /= len(pred_answers)
        val_loss /= step
    return pred_answers, target_answers, val_loss, em_score, f1_score

### Finetuning the ALBERT Model

In [38]:
model = AlbertForQuestionAnswering.from_pretrained('albert-base-v2')
model.to(device)

Downloading model.safetensors:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertForQuestionAnswering: ['predictions.LayerNorm.bias', 'predictions.bias', 'predictions.decoder.bias', 'predictions.LayerNorm.weight', 'predictions.dense.weight', 'predictions.dense.bias']
- This IS expected if you are initializing AlbertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForQuestionAnswering were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream tas

AlbertForQuestionAnswering(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias

In [39]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

In [41]:
for epoch in range(NO_EPOCHS): #TRAINING
    loss, em, f1 = train(model, training_loader, optimizer)
    print(f'Epoch: {epoch}, Loss:  {loss}, Em: {em}, F1: {f1}')
    pred_answers, target_answers, val_loss, em_score, f1_score = validator(model, val_loader)
    print(f'Loss:  {val_loss}, Em: {em_score}, F1: {f1_score}')
    torch.save(model, '/kaggle/working/fine_tuned_albert'+str(epoch)+'.model')

Starting training...


  6%|▌         | 1000/16278 [07:32<1:54:06,  2.23it/s]

Train loss: 3.8582937030792235, Exact Match: 0.227875, F1 Score: 0.28296949993513815


 12%|█▏        | 2000/16278 [14:59<1:46:00,  2.24it/s]

Train loss: 3.1481950763762, Exact Match: 0.286125, F1 Score: 0.35187516097460075


 18%|█▊        | 3000/16278 [22:38<1:44:05,  2.13it/s]

Train loss: 3.602371498843034, Exact Match: 0.25566666666666665, F1 Score: 0.31267618265266994


 25%|██▍       | 4000/16278 [30:25<1:35:01,  2.15it/s]

Train loss: 4.261346676781773, Exact Match: 0.2044375, F1 Score: 0.25536665871299863


 31%|███       | 5000/16278 [37:57<1:23:37,  2.25it/s]

Train loss: 4.366852917730808, Exact Match: 0.204675, F1 Score: 0.24799700654229445


 37%|███▋      | 6000/16278 [45:22<1:16:35,  2.24it/s]

Train loss: 4.165799406200647, Exact Match: 0.22566666666666665, F1 Score: 0.26210181778456293


 43%|████▎     | 7000/16278 [52:48<1:08:58,  2.24it/s]

Train loss: 3.8927660236018045, Exact Match: 0.24151785714285715, F1 Score: 0.2765725678133774


 49%|████▉     | 8000/16278 [1:00:16<1:02:08,  2.22it/s]

Train loss: 3.6524287201464176, Exact Match: 0.257921875, F1 Score: 0.2928763744613358


 55%|█████▌    | 9000/16278 [1:07:43<54:10,  2.24it/s]  

Train loss: 3.453640823645724, Exact Match: 0.27293055555555557, F1 Score: 0.30838798705425724


 61%|██████▏   | 10000/16278 [1:15:11<46:49,  2.23it/s]

Train loss: 3.2850401488035916, Exact Match: 0.2880875, F1 Score: 0.32502252311905083


 68%|██████▊   | 11000/16278 [1:22:38<39:14,  2.24it/s]

Train loss: 3.140764059131796, Exact Match: 0.30218181818181816, F1 Score: 0.34010027444360813


 74%|███████▎  | 12000/16278 [1:30:05<31:51,  2.24it/s]

Train loss: 3.014253853512307, Exact Match: 0.31527083333333333, F1 Score: 0.35445183607878966


 80%|███████▉  | 13000/16278 [1:37:32<24:29,  2.23it/s]

Train loss: 2.9020675299511507, Exact Match: 0.32790384615384616, F1 Score: 0.36833931564970585


 86%|████████▌ | 14000/16278 [1:44:59<16:56,  2.24it/s]

Train loss: 2.805034882115466, Exact Match: 0.3387589285714286, F1 Score: 0.38051178480488623


 92%|█████████▏| 15000/16278 [1:52:25<09:28,  2.25it/s]

Train loss: 2.7174150839567184, Exact Match: 0.3490666666666667, F1 Score: 0.39176325894791675


 98%|█████████▊| 16000/16278 [1:59:52<02:03,  2.24it/s]

Train loss: 2.639021310135722, Exact Match: 0.3585859375, F1 Score: 0.4023062725065609


100%|██████████| 16278/16278 [2:01:55<00:00,  2.23it/s]


Epoch: 0, Loss:  2.618088075270713, Em: 0.36109724536734833, F1: 0.4050529072466225
Starting validation...


100%|██████████| 3279/3279 [09:00<00:00,  6.06it/s]


Loss:  1.826110798889415, Em: 0.49012656297651724, F1: 0.5699224278970338
Starting training...


  6%|▌         | 1000/16278 [07:26<1:53:35,  2.24it/s]

Train loss: 1.286266210272908, Exact Match: 0.55325, F1 Score: 0.610800266494179


 12%|█▏        | 2000/16278 [14:53<1:46:02,  2.24it/s]

Train loss: 1.2743000508844853, Exact Match: 0.5516875, F1 Score: 0.6129627648988973


 18%|█▊        | 3000/16278 [22:19<1:38:50,  2.24it/s]

Train loss: 1.2678537793457507, Exact Match: 0.551875, F1 Score: 0.614134632708894


 25%|██▍       | 4000/16278 [29:46<1:31:09,  2.24it/s]

Train loss: 1.2593281202688813, Exact Match: 0.5568125, F1 Score: 0.6183211529882287


 31%|███       | 5000/16278 [37:12<1:23:40,  2.25it/s]

Train loss: 1.2523564333558082, Exact Match: 0.558575, F1 Score: 0.6207516249728628


 37%|███▋      | 6000/16278 [44:38<1:16:23,  2.24it/s]

Train loss: 1.2410491436769564, Exact Match: 0.5627083333333334, F1 Score: 0.6256375871495011


 43%|████▎     | 7000/16278 [52:04<1:09:02,  2.24it/s]

Train loss: 1.2356986632134233, Exact Match: 0.5634821428571428, F1 Score: 0.6271192907357137


 49%|████▉     | 8000/16278 [59:31<1:01:35,  2.24it/s]

Train loss: 1.2345239231698215, Exact Match: 0.564078125, F1 Score: 0.6282936792589987


 55%|█████▌    | 9000/16278 [1:06:57<54:03,  2.24it/s]  

Train loss: 1.230875253389279, Exact Match: 0.5648888888888889, F1 Score: 0.6295217329542654


 61%|██████▏   | 10000/16278 [1:14:24<46:39,  2.24it/s]

Train loss: 1.2279718457430602, Exact Match: 0.5660125, F1 Score: 0.6308327660424012


 68%|██████▊   | 11000/16278 [1:21:51<39:14,  2.24it/s]

Train loss: 1.2249902967648072, Exact Match: 0.5667954545454545, F1 Score: 0.6316212757909306


 74%|███████▎  | 12000/16278 [1:29:17<31:53,  2.24it/s]

Train loss: 1.2219122763251264, Exact Match: 0.56778125, F1 Score: 0.6324171979077534


 80%|███████▉  | 13000/16278 [1:36:43<24:21,  2.24it/s]

Train loss: 1.2188659395300425, Exact Match: 0.5688653846153846, F1 Score: 0.6336459566020053


 86%|████████▌ | 14000/16278 [1:44:10<16:56,  2.24it/s]

Train loss: 1.2143676198508058, Exact Match: 0.5696339285714286, F1 Score: 0.6343288790415744


 92%|█████████▏| 15000/16278 [1:51:36<09:29,  2.24it/s]

Train loss: 1.2101153595745564, Exact Match: 0.5713333333333334, F1 Score: 0.6359740964332167


 98%|█████████▊| 16000/16278 [1:59:02<02:03,  2.25it/s]

Train loss: 1.2074879784733057, Exact Match: 0.5723359375, F1 Score: 0.6370996706415042


100%|██████████| 16278/16278 [2:01:06<00:00,  2.24it/s]


Epoch: 1, Loss:  1.2065701771379689, Em: 0.5727746761175576, F1: 0.6375518762097395
Starting validation...


100%|██████████| 3279/3279 [08:47<00:00,  6.22it/s]


Loss:  1.6024635149306297, Em: 0.6264486123818237, F1: 0.7086805517176418
Starting training...


  6%|▌         | 1000/16278 [07:25<1:53:34,  2.24it/s]

Train loss: 0.8583097937777638, Exact Match: 0.67575, F1 Score: 0.742528940063563


 12%|█▏        | 2000/16278 [14:51<1:45:46,  2.25it/s]

Train loss: 0.8522205267995596, Exact Match: 0.677, F1 Score: 0.7425820672181246


 18%|█▊        | 3000/16278 [22:17<1:38:52,  2.24it/s]

Train loss: 0.8664455723414819, Exact Match: 0.670625, F1 Score: 0.7361963052506357


 25%|██▍       | 4000/16278 [29:44<1:31:24,  2.24it/s]

Train loss: 0.8746225435696542, Exact Match: 0.665, F1 Score: 0.7313048940907628


 31%|███       | 5000/16278 [37:10<1:24:24,  2.23it/s]

Train loss: 0.8797853091150523, Exact Match: 0.663075, F1 Score: 0.7299878182267108


 37%|███▋      | 6000/16278 [44:37<1:16:19,  2.24it/s]

Train loss: 0.8806936976661285, Exact Match: 0.6631458333333333, F1 Score: 0.7297685432797496


 43%|████▎     | 7000/16278 [52:03<1:09:13,  2.23it/s]

Train loss: 0.8848741483262607, Exact Match: 0.6616071428571428, F1 Score: 0.728535237062821


 49%|████▉     | 8000/16278 [59:29<1:01:29,  2.24it/s]

Train loss: 0.8859839454572648, Exact Match: 0.66134375, F1 Score: 0.7283012604798662


 55%|█████▌    | 9000/16278 [1:06:55<53:58,  2.25it/s]  

Train loss: 0.8914788280559911, Exact Match: 0.6593472222222222, F1 Score: 0.7263839690416803


 61%|██████▏   | 10000/16278 [1:14:21<46:43,  2.24it/s]

Train loss: 0.8931440785430371, Exact Match: 0.6581875, F1 Score: 0.7253356787036843


 68%|██████▊   | 11000/16278 [1:21:47<39:09,  2.25it/s]

Train loss: 0.8945847004922953, Exact Match: 0.6578068181818182, F1 Score: 0.7250951426756945


 74%|███████▎  | 12000/16278 [1:29:13<31:51,  2.24it/s]

Train loss: 0.8963199070201566, Exact Match: 0.6573125, F1 Score: 0.724529824196978


 80%|███████▉  | 13000/16278 [1:36:39<24:20,  2.24it/s]

Train loss: 0.8985265297832398, Exact Match: 0.6564326923076923, F1 Score: 0.7237525785206282


 86%|████████▌ | 14000/16278 [1:44:05<16:56,  2.24it/s]

Train loss: 0.9004119879385191, Exact Match: 0.6552946428571429, F1 Score: 0.7227918853425737


 92%|█████████▏| 15000/16278 [1:51:32<09:30,  2.24it/s]

Train loss: 0.9017574034308394, Exact Match: 0.6543916666666667, F1 Score: 0.7218895670498173


 98%|█████████▊| 16000/16278 [1:58:58<02:04,  2.24it/s]

Train loss: 0.9036970145232044, Exact Match: 0.6533515625, F1 Score: 0.7207586307711308


100%|██████████| 16278/16278 [2:01:02<00:00,  2.24it/s]


Epoch: 2, Loss:  0.9046298157515734, Em: 0.6530944500334058, F1: 0.7204258462377654
Starting validation...


100%|██████████| 3279/3279 [08:47<00:00,  6.22it/s]


Loss:  1.633917961988772, Em: 0.6252287282708143, F1: 0.7095050309025094
