# Question Answering Model 
## no trainer

- dataset
- torch
- transformers
- transformers[torch]
- evaluate

import packages

In [70]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    default_data_collator,
    get_scheduler,
    AutoModelForQuestionAnswering,
)
from torch.utils.data import DataLoader
from torch.optim import AdamW
import torch

import evaluate
import collections
from tqdm.auto import tqdm
import numpy as np

import os
import re
import datetime

Set cache directory.

In [71]:
model_dir = '/mount/arbeitsdaten31/studenten1/linku/models'
CACHE_DIR='/mount/arbeitsdaten31/studenten1/linku/cache'
%set_env TRANSFORMERS_CACHE $CACHE_DIR
%set_env HF_MODULES_CACHE $CACHE_DIR
%set_env HF_DATASETS_CACHE $CACHE_DIR

env: TRANSFORMERS_CACHE=/mount/arbeitsdaten31/studenten1/linku/cache
env: HF_MODULES_CACHE=/mount/arbeitsdaten31/studenten1/linku/cache
env: HF_DATASETS_CACHE=/mount/arbeitsdaten31/studenten1/linku/cache


### arguments.py

args_input.

In [72]:
args_input_ALstrategy = 'MarginSampling'
args_input_initseed = 100 # 1000
args_input_quota = 100 # 1000
args_input_batch = 35 # 128
args_input_dataset_name = 'SQuAD'
args_input_iteration = 1
args_input_model_batch = 8 # already add in arguments.py

### load dataset

In [73]:
squad = load_dataset(args_input_dataset_name.lower())
# squad["train"] = squad["train"].shuffle(42).select(range(2000))
squad["train"] = squad["train"].select(range(3000))
squad["validation"] = squad["validation"].select(range(1000))

Found cached dataset squad (/home/users1/linku/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)
100%|██████████| 2/2 [00:00<00:00, 99.76it/s]


Next we will preprocess the dataset (training and evaluation data).

In [74]:
def preprocess_training_features(examples):
    # keep ["offset_mapping"], for compute_metrics()
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    offset_mapping = inputs["offset_mapping"]
    sample_map = inputs.pop("overflow_to_sample_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []
    example_ids = []

    for i, offset in enumerate(offset_mapping):
        sample_idx = sample_map[i]
        answer = answers[sample_idx]
        start_char = answer["answer_start"][0]
        end_char = answer["answer_start"][0] + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)
        
        example_ids.append(examples["id"][sample_idx]) # newly added for used in unlabel data predict

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label is (0, 0)
        if offset[context_start][0] > start_char or offset[context_end][1] < end_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["example_id"] = example_ids
    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

In [75]:
def preprocess_training_examples(examples):
    # no ['offset_mapping'], for .train() and .eval()
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    offset_mapping = inputs.pop("offset_mapping")
    sample_map = inputs.pop("overflow_to_sample_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []
    example_ids = []

    for i, offset in enumerate(offset_mapping):
        sample_idx = sample_map[i]
        answer = answers[sample_idx]
        start_char = answer["answer_start"][0]
        end_char = answer["answer_start"][0] + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)
        
        example_ids.append(examples["id"][sample_idx]) # newly added for used in unlabel data predict

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label is (0, 0)
        if offset[context_start][0] > start_char or offset[context_end][1] < end_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

In [76]:
def preprocess_validation_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=max_length,
        truncation="only_second",
        stride=stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    sample_map = inputs.pop("overflow_to_sample_mapping")
    example_ids = []

    for i in range(len(inputs["input_ids"])):
        sample_idx = sample_map[i]
        example_ids.append(examples["id"][sample_idx])

        sequence_ids = inputs.sequence_ids(i)
        offset = inputs["offset_mapping"][i]
        inputs["offset_mapping"][i] = [
            o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
        ]

    inputs["example_id"] = example_ids
    return inputs

In [77]:
max_length = 384
stride = 128

# load tokenizer for dataset preprocessing
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
# preprocess data
train_dataset = squad["train"].map(
    preprocess_training_examples,
    batched=True,
    remove_columns=squad["train"].column_names,
)
train_features = squad["train"].map(
    preprocess_training_features,
    batched=True,
    remove_columns=squad["train"].column_names,
)
val_dataset = squad["validation"].map(
    preprocess_validation_examples,
    batched=True,
    remove_columns=squad["validation"].column_names,
)
val_features = squad["validation"].map(
    preprocess_validation_examples,
    batched=True,
    remove_columns=squad["validation"].column_names,
)

Loading cached processed dataset at /home/users1/linku/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-d10ddabde563b597.arrow
Loading cached processed dataset at /home/users1/linku/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-773e51dc4558cf67.arrow
Loading cached processed dataset at /home/users1/linku/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-0f087a92e258c27b.arrow
Loading cached processed dataset at /home/users1/linku/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-0f087a92e258c27b.arrow


In [78]:
train_dataset.set_format("torch")
train_features.set_format("torch")
val_dataset = val_dataset.remove_columns(["offset_mapping"])
val_dataset.set_format("torch")
val_features.set_format("torch")

## model.py

In [79]:
def to_train(num_train_epochs, train_dataloader, device, model, optimizer, lr_scheduler):
	print('num of train dataset', len(train_dataloader.dataset))
	for epoch in range(num_train_epochs):
		model.train()
		for step, batch in enumerate(tqdm(train_dataloader, desc="Training")):
			batch = {key: value.to(device) for key, value in batch.items()}
			outputs = model(**batch)
			loss = outputs.loss
			loss.backward()

			optimizer.step()
			lr_scheduler.step()
			optimizer.zero_grad()
    
		model_to_save = model.module if hasattr(model, 'module') else model
		model_to_save.save_pretrained(model_dir)
	print('TRAIN done!')

In [80]:
metric = evaluate.load("squad")

In [81]:
def compute_metrics(start_logits, end_logits, features, examples):
    
    example_to_features = collections.defaultdict(list)
    max_answer_length = 30
    n_best = 20
    for idx, feature in enumerate(features):
        example_to_features[feature["example_id"]].append(idx)

    predicted_answers = []
    for example in tqdm(examples, desc="Computing metrics"):
        example_id = example["id"]
        context = example["context"]
        answers = []

        # Loop through all features associated with that example
        for feature_index in example_to_features[example_id]:
            start_logit = start_logits[feature_index]
            end_logit = end_logits[feature_index]
            offsets = features[feature_index]["offset_mapping"]

            start_indexes = np.argsort(start_logit)[-1 : -n_best - 1 : -1].tolist()
            end_indexes = np.argsort(end_logit)[-1 : -n_best - 1 : -1].tolist()
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # Skip answers that are not fully in the context
                    if offsets[start_index] is None or offsets[end_index] is None:
                        continue
                    # Skip answers with a length that is either < 0 or > max_answer_length
                    if (
                        end_index < start_index
                        or end_index - start_index + 1 > max_answer_length
                    ):
                        continue

                    answer = {
                        "text": context[offsets[start_index][0] : offsets[end_index][1]],
                        "logit_score": start_logit[start_index] + end_logit[end_index],
                    }
                    answers.append(answer)

        # Select the answer with the best score
        if len(answers) > 0:
            best_answer = max(answers, key=lambda x: x["logit_score"])
            predicted_answers.append(
                {"id": example_id, "prediction_text": best_answer["text"]}
            )
        else:
            predicted_answers.append({"id": example_id, "prediction_text": ""})

    theoretical_answers = [{"id": ex["id"], "answers": ex["answers"]} for ex in examples]
    return metric.compute(predictions=predicted_answers, references=theoretical_answers)

In [82]:
def get_pred(eval_dataloader, device, features, examples):
    model = AutoModelForQuestionAnswering.from_pretrained(model_dir).to(device)
    
    test_loss = []
    model.eval()
    start_logits = []
    end_logits = []
    for batch in tqdm(eval_dataloader, desc="Evaluating_pred"):
        batch = {key: value.to(device) for key, value in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
            print(outputs)
            print(outputs.loss)
            test_loss.append(outputs.loss)

        start_logits.append(outputs.start_logits.cpu().numpy())
        end_logits.append(outputs.end_logits.cpu().numpy())

    start_logits = np.concatenate(start_logits)
    end_logits = np.concatenate(end_logits)
    start_logits = start_logits[: len(features)]
    end_logits = end_logits[: len(features)]

    # if record_loss:
    #     test_loss /= len(eval_dataloader.dataset)
    print('\nTest set: Average loss: {:.4f}\n'.format(test_loss))

    return compute_metrics(start_logits, end_logits, features, examples)

In [83]:
def get_prob(eval_dataloader, device, features, examples):
    model = AutoModelForQuestionAnswering.from_pretrained(model_dir).to(device)

    model.eval()
    start_logits = []
    end_logits = []

    for batch in tqdm(eval_dataloader, desc="Evaluating_prob"):
        batch = {key: value.to(device) for key, value in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        start_logits.append(outputs.start_logits.cpu().numpy())
        end_logits.append(outputs.end_logits.cpu().numpy())

    start_logits = np.concatenate(start_logits)
    end_logits = np.concatenate(end_logits)
    start_logits = start_logits[: len(features)]
    end_logits = end_logits[: len(features)]

    prob_dict = {}
    example_to_features = collections.defaultdict(list)
    max_answer_length = 30
    n_best = 20 # TODO: if set n_best as 5, will it effect the time??
    
    for idx, feature in enumerate(features):
        example_to_features[feature["example_id"]].append(idx)

    for example in tqdm(examples, desc="Computing metrics"):
        example_id = example["id"]
        # context = example["context"]
        answers = []

        # Loop through all features associated with that example
        for feature_index in example_to_features[example_id]:
            start_logit = start_logits[feature_index]
            end_logit = end_logits[feature_index]
            offsets = features[feature_index]["offset_mapping"]

            start_indexes = np.argsort(start_logit)[-1 : -n_best - 1 : -1].tolist()
            end_indexes = np.argsort(end_logit)[-1 : -n_best - 1 : -1].tolist()
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # Skip answers that are not fully in the context
                    if offsets[start_index] is None or offsets[end_index] is None:
                        continue
                    # Skip answers with a length that is either < 0 or > max_answer_length
                    if (
                        end_index < start_index
                        or end_index - start_index + 1 > max_answer_length
                    ):
                        continue

                    answers.append(start_logit[start_index] + end_logit[end_index])
        
            if len(answers) > 1:
                prob_dict[feature_index] = softmax(answers)
            elif example_to_features[example_id] != []:
                prob_dict[feature_index] = np.array([0])
    
    return prob_dict

In [84]:
def get_prob_dropout(eval_dataloader, device, features, examples, n_drop=10):
    # deepAL+: self.clf.train()
    model = AutoModelForQuestionAnswering.from_pretrained(model_dir).to(device)
    model.train()
    # deepAL+: probs = torch.zeros([len(data), len(np.unique(data.Y))])
    # deepAL+: loader = DataLoader(data, shuffle=False, **self.params['loader_te_args'])
    prob_dict = {}
    # deepAL+: for i in range(n_drop):
    # deepAL+:     with torch.no_grad():
    # deepAL+:         for x, y, idxs in loader:
    # deepAL+:             x, y = x.to(self.device), y.to(self.device)
    # deepAL+:             out, e1 = self.clf(x)
    # deepAL+:             prob = F.softmax(out, dim=1)
    # deepAL+:             probs[idxs] += prob.cpu()
    for i in range(n_drop):
        start_logits = []
        end_logits = []
        for batch in tqdm(eval_dataloader, desc="Evaluating_prob_dropout"):
            batch = {key: value.to(device) for key, value in batch.items()}
            with torch.no_grad():
                outputs = model(**batch)

            start_logits.append(outputs.start_logits.cpu().numpy())
            end_logits.append(outputs.end_logits.cpu().numpy())

        start_logits = np.concatenate(start_logits)
        end_logits = np.concatenate(end_logits)
        start_logits = start_logits[: len(features)]
        end_logits = end_logits[: len(features)]

        example_to_features = collections.defaultdict(list)
        max_answer_length = 30
        n_best = 20
            
        for idx, feature in enumerate(features):
            example_to_features[feature["example_id"]].append(idx)

        n = 0
        for example in tqdm(examples):
            example_id = example["id"]
            answers = []

            # Loop through all features associated with that example
            for feature_index in example_to_features[example_id]:
                start_logit = start_logits[feature_index]
                end_logit = end_logits[feature_index]
                offsets = features[feature_index]["offset_mapping"]

                start_indexes = np.argsort(start_logit)[-1 : -n_best - 1 : -1].tolist()
                end_indexes = np.argsort(end_logit)[-1 : -n_best - 1 : -1].tolist()
                for start_index in start_indexes:
                    for end_index in end_indexes:
                        # Skip answers that are not fully in the context
                        if offsets[start_index] is None or offsets[end_index] is None:
                            continue
                        # Skip answers with a length that is either < 0 or > max_answer_length
                        if (
                            end_index < start_index
                            or end_index - start_index + 1 > max_answer_length
                        ):
                            continue

                        answers.append(start_logit[start_index] + end_logit[end_index])

            if 1 < len(answers) < 200: # pad to same numbers of possible answers
                zero_list = [0] * (200 - len(answers))
                answers.extend(zero_list)
            elif len(answers) >= 200:
                answers = answers[:200]

            if len(answers) > 1:
                if feature_index not in prob_dict:
                    prob_dict[feature_index] = softmax(answers)
                else:
                    prob_dict[feature_index] += softmax(answers)
            elif example_to_features[example_id] != []:
                if feature_index not in prob_dict:
                    prob_dict[feature_index] = np.array([0])   

    for key in prob_dict.keys():
        prob_dict[key] /= n_drop

    return prob_dict

In [85]:
def get_prob_dropout_split(eval_dataloader, device, features, examples, n_drop=10):
    ## use tensor to save the answers
    
    # deepAL+: self.clf.train()
    model = AutoModelForQuestionAnswering.from_pretrained(model_dir).to(device)
    model.train()
    # deepAL+: probs = torch.zeros([len(data), len(np.unique(data.Y))])
    # deepAL+: loader = DataLoader(data, shuffle=False, **self.params['loader_te_args'])
    probs = torch.zeros([n_drop, len(eval_dataloader.dataset), 200])
    for_check = []
    # deepAL+: for i in range(n_drop):
    # deepAL+:     with torch.no_grad():
    # deepAL+:         for x, y, idxs in loader:
    # deepAL+:             x, y = x.to(self.device), y.to(self.device)
    # deepAL+:             out, e1 = self.clf(x)
    # deepAL+:             prob = F.softmax(out, dim=1)
    # deepAL+:             probs[i][idxs] += F.softmax(out, dim=1).cpu()
    for i in range(n_drop):
        prob_dict = {}
        start_logits = []
        end_logits = []
        for batch in tqdm(eval_dataloader, desc="Evaluating_prob_dropout"):
            batch = {key: value.to(device) for key, value in batch.items()}
            with torch.no_grad():
                outputs = model(**batch)

            start_logits.append(outputs.start_logits.cpu().numpy())
            end_logits.append(outputs.end_logits.cpu().numpy())

        start_logits = np.concatenate(start_logits)
        end_logits = np.concatenate(end_logits)
        start_logits = start_logits[: len(features)]
        end_logits = end_logits[: len(features)]

        example_to_features = collections.defaultdict(list)
        max_answer_length = 30
        n_best = 20
            
        for idx, feature in enumerate(features):
            example_to_features[feature["example_id"]].append(idx)

        n = 0
        for example in tqdm(examples, desc="Computing metrics"):
            example_id = example["id"]
            # context = example["context"]
            answers = []

            # Loop through all features associated with that example
            for feature_index in example_to_features[example_id]:
                start_logit = start_logits[feature_index]
                end_logit = end_logits[feature_index]
                offsets = features[feature_index]["offset_mapping"]

                start_indexes = np.argsort(start_logit)[-1 : -n_best - 1 : -1].tolist()
                end_indexes = np.argsort(end_logit)[-1 : -n_best - 1 : -1].tolist()
                for start_index in start_indexes:
                    for end_index in end_indexes:
                        # Skip answers that are not fully in the context
                        if offsets[start_index] is None or offsets[end_index] is None:
                            continue
                        # Skip answers with a length that is either < 0 or > max_answer_length
                        if (
                            end_index < start_index
                            or end_index - start_index + 1 > max_answer_length
                        ):
                            continue

                        answers.append(start_logit[start_index] + end_logit[end_index])

            
                if 1 < len(answers) < 200: # pad to same numbers of possible answers
                    zero_list = [0] * (200 - len(answers))
                    answers.extend(zero_list)
                elif len(answers) >= 200:
                    answers = answers[:200]

                probs[i][feature_index] += torch.tensor(softmax(answers))

            # if n == 0 and len(softmax(answers)) > 1:
            #     for_check.append(answers[:5])
            #     n += 1 

    # return prob_dict, for_check
    return probs

## utils.py

In [86]:
def get_unlabel_data(n_pool, labeled_idxs, train_dataset):
    unlabeled_idxs = np.arange(n_pool)[~labeled_idxs]
    unlabeled_data = train_dataset.select(indices=unlabeled_idxs)
    return unlabeled_idxs, unlabeled_data

In [87]:
def softmax(x): 
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x) / np.sum(np.exp(x), axis=0)

## Query

In [88]:
def random_sampling_query(labeled_idxs, n):
    return np.random.choice(np.where(labeled_idxs==0)[0], n, replace=False)

In [89]:
def margin_sampling_query(n_pool, labeled_idxs, train_dataset, train_features, examples, model, device, n):
    # deepAL+: unlabeled_idxs, unlabeled_data = self.dataset.get_unlabeled_data()
    unlabeled_idxs, unlabeled_data = get_unlabel_data(n_pool, labeled_idxs, train_dataset)
    unlabeled_features = train_features.select(unlabeled_idxs)
    unlabeled_dataloader = DataLoader(
		unlabeled_data,
		shuffle=True,
		collate_fn=default_data_collator,
		batch_size=8,
	)
    # TODO: print for recording
    print('Margin querying starts!')
    # deepAL+: probs = self.predict_prob(unlabeled_data)
    prob_dict = get_prob(unlabeled_dataloader, device, unlabeled_features, examples)
    # TODO: print for recording
    print('Got probability!')
    # deepAL+: probs_sorted, _ = probs.sort(descending=True)
    # deepAL+: uncertainties = probs_sorted[:, 0] - probs_sorted[:,1]
    uncertainties_dict = {}
    for idx, probs in prob_dict.items():
        if len(probs) > 1: # if prob_dict['probs'] is not 0
            sort_probs = np.sort(probs)[::-1] # This method returns a copy of the array, leaving the original array unchanged.
            uncertainties_dict[idx] = sort_probs[0] - sort_probs[1]
        elif idx:
            uncertainties_dict[idx] = np.array([0])

    # deepAL+: return unlabeled_idxs[uncertainties.sort()[1][:n]] 
    sorted_uncertainties_list = sorted(uncertainties_dict.items(), key=lambda x: x[1], reverse=True)
    
    return unlabeled_idxs[[idx for (idx, uncertainties) in sorted_uncertainties_list[:n]]]

In [90]:
def least_confidence_query(n_pool, labeled_idxs, train_dataset, train_features, examples, device, n):
    # deepAL+: unlabeled_idxs, unlabeled_data = self.dataset.get_unlabeled_data()
    unlabeled_idxs, unlabeled_data = get_unlabel_data(n_pool, labeled_idxs, train_dataset)
    unlabeled_features = train_features.select(unlabeled_idxs)
    unlabeled_dataloader = DataLoader(
		unlabeled_data,
		shuffle=True,
		collate_fn=default_data_collator,
		batch_size=8,
	)
    # TODO: print for recording
    print('LC querying starts!')
    # deepAL+: probs = self.predict_prob(unlabeled_data)
    prob_dict = get_prob(unlabeled_dataloader, device, unlabeled_features, examples)
    # TODO: print for recording
    print('Got probability!')
    # deepAL+: uncertainties = probs.max(1)[0]
    confidence_dict = {}
    for idx, probs in prob_dict.items():
        if len(probs) > 1: # if prob_dict['probs'] is not 0
            confidence_dict[idx] = max(probs)
        elif idx:
            confidence_dict[idx] = np.array([0])

    # deepAL+: return unlabeled_idxs[uncertainties.sort()[1][:n]]
    sorted_confidence_list = sorted(confidence_dict.items, key=lambda x: x[1])
    return unlabeled_idxs[[idx for (idx, confidence) in sorted_confidence_list[:n]]]

In [91]:
def var_ratio_query(n_pool, labeled_idxs, train_dataset, train_features, examples, device, n):
    # deepAL+: unlabeled_idxs, unlabeled_data = self.dataset.get_unlabeled_data()
    unlabeled_idxs, unlabeled_data = get_unlabel_data(n_pool, labeled_idxs, train_dataset)
    unlabeled_features = train_features.select(unlabeled_idxs)
    unlabeled_dataloader = DataLoader(
		unlabeled_data,
		shuffle=True,
		collate_fn=default_data_collator,
		batch_size=8,
	)
    # TODO: print for recording
    print('Var Ratio querying starts!')
    # deepAL+: probs = self.predict_prob(unlabeled_data)
    prob_dict = get_prob(unlabeled_dataloader, device, unlabeled_features, examples)
    # TODO: print for recording
    print('Got probability!')
    # deepAL+: preds = torch.max(probs, 1)[0]
    # deepAL+: uncertainties = 1.0 - preds
    confidence_dict = {}
    for idx, probs in prob_dict.items():
        if len(probs) > 1: # if prob_dict['probs'] is not 0
            confidence_dict[idx] = 1.0 - max(probs)
        elif idx:
            confidence_dict[idx] = np.array([0])

    # deepAL+: return unlabeled_idxs[uncertainties.sort(descending=True)[1][:n]]
    sorted_confidence_list = sorted(confidence_dict.items, key=lambda x: x[1], reverse=True)
    return unlabeled_idxs[[idx for (idx, confidence) in sorted_confidence_list[:n]]]
# comment for the same query as LC

In [92]:
def entropy_query(n_pool, labeled_idxs, train_dataset, train_features, examples, device, n):
    # deepAL+: unlabeled_idxs, unlabeled_data = self.dataset.get_unlabeled_data()
    unlabeled_idxs, unlabeled_data = get_unlabel_data(n_pool, labeled_idxs, train_dataset)
    unlabeled_features = train_features.select(unlabeled_idxs)
    unlabeled_dataloader = DataLoader(
		unlabeled_data,
		shuffle=True,
		collate_fn=default_data_collator,
		batch_size=8,
	)
    # deepAL+: probs = self.predict_prob(unlabeled_data)
    # TODO: print for recording
    print('Entropy querying starts!')
    prob_dict = get_prob(unlabeled_dataloader, device, unlabeled_features, examples)
    # TODO: print for recording
    print('Got probability!')
    # deepAL+: log_probs = torch.log(probs)
    # deepAL+: uncertainties = (probs*log_probs).sum(1)
    entropy_dict = {}
    for idx, probs in prob_dict.items():
        if len(probs) > 1: # if prob_dict['probs'] is not 0
            log_probs = np.log(probs)
            entropy_dict[idx] = (probs*log_probs).sum()
        elif idx:
            entropy_dict[idx] = np.array([0])
    # deepAL+: return unlabeled_idxs[uncertainties.sort()[1][:n]]
    sorted_entropy_list = sorted(entropy_dict.items(), key=lambda x: x[1])
    return unlabeled_idxs[[idx for (idx, entropy) in sorted_entropy_list[:n]]]

In [93]:
def margin_sampling_dropout_query(n_pool, labeled_idxs, train_dataset, train_features, examples, device, n):
    unlabeled_idxs, unlabeled_data = get_unlabel_data(n_pool, labeled_idxs, train_dataset)
    unlabeled_features = train_features.select(unlabeled_idxs)
    unlabeled_dataloader = DataLoader(
		unlabeled_data,
		shuffle=True,
		collate_fn=default_data_collator,
		batch_size=8,
	)
    # TODO: print for recording
    print('Margin dropout querying starts!')
    prob_dict = get_prob_dropout(unlabeled_dataloader, device, unlabeled_features, examples)
    # TODO: print for recording
    print('Got probability!')
    uncertainties_dict = {}
    for idx, probs in prob_dict.items():
        if len(probs) > 1: # if prob_dict['probs'] is not 0
            sort_probs = np.sort(probs)[::-1] # This method returns a copy of the array, leaving the original array unchanged.
            uncertainties_dict[idx] = sort_probs[0] - sort_probs[1]
        elif idx:
            uncertainties_dict[idx] = np.array([0])

    sorted_uncertainties_list = sorted(uncertainties_dict.items(), key=lambda x: x[1], reverse=True)
    return unlabeled_idxs[[idx for (idx, uncertainties) in sorted_uncertainties_list[:n]]]

In [94]:
def mean_std_query(n_pool, labeled_idxs, train_dataset, train_features, examples, device, n):
    # deepAL+: unlabeled_idxs, unlabeled_data = self.dataset.get_unlabeled_data()
    unlabeled_idxs, unlabeled_data = get_unlabel_data(n_pool, labeled_idxs, train_dataset)
    unlabeled_features = train_features.select(unlabeled_idxs)
    unlabeled_dataloader = DataLoader(
  		unlabeled_data,
      shuffle=True,
      collate_fn=default_data_collator,
      batch_size=8,
    )
    # TODO: print for recording
    print('Mean STD querying starts!')
    # deepAL+: probs = self.predict_prob_dropout_split(unlabeled_data, n_drop=self.n_drop).numpy()
    probs = get_prob_dropout_split(unlabeled_dataloader, device, unlabeled_features, examples).numpy()
    # TODO: print for recording
    print('Got probability!')
    # deepAL+: sigma_c = np.std(probs, axis=0)
    sigma_c = np.std(probs, axis=0)
    # deepAL+: uncertainties = torch.from_numpy(np.mean(sigma_c, axis=-1))
    uncertainties = torch.from_numpy(np.mean(sigma_c, axis=-1)) # use tensor.sort() will sort the data and produce sorted indexes
    # deepAL+: return unlabeled_idxs[uncertainties.sort(descending=True)[1][:n]] # [1]: to get sorted data's indexes
    return unlabeled_idxs[uncertainties.sort(descending=True)[1][:n]]

In [95]:
def bayesian_query(n_pool, labeled_idxs, train_dataset, train_features, examples, device, n):
    # deepAL+: unlabeled_idxs, unlabeled_data = self.dataset.get_unlabeled_data()
    unlabeled_idxs, unlabeled_data = get_unlabel_data(n_pool, labeled_idxs, train_dataset)
    unlabeled_features = train_features.select(unlabeled_idxs)
    unlabeled_dataloader = DataLoader(
      unlabeled_data,
      shuffle=True,
      collate_fn=default_data_collator,
      batch_size=8,
    )
    # deepAL+: probs = self.predict_prob_dropout_split(unlabeled_data, n_drop=self.n_drop)
    probs = get_prob_dropout_split(unlabeled_dataloader, device, unlabeled_features, examples)
    # deepAL+: pb = probs.mean(0)
    probs_mean = probs.mean(0)
    # deepAL+: entropy1 = (-pb*torch.log(pb)).sum(1)
    entropy1 = (-probs_mean*torch.log(probs_mean)).sum(1)
    # deepAL+: entropy2 = (-probs*torch.log(probs)).sum(2).mean(0)
    entropy2 = (-probs*torch.log(probs)).sum(2).mean(0)
    # deepAL+: uncertainties = entropy2 - entropy1
    uncertainties = entropy2 - entropy1
    # later on, we can use batch
    # deepAL+: return unlabeled_idxs[uncertainties.sort()[1][:n]]
    return unlabeled_idxs[uncertainties.sort()[1][:n]]

# main.py

### parameters

In [96]:
NUM_QUERY = args_input_batch
NUM_INIT_LB = args_input_initseed
NUM_ROUND = int(args_input_quota / args_input_batch)
DATA_NAME = args_input_dataset_name
STRATEGY_NAME = args_input_ALstrategy

### seed and device

In [97]:
SEED = 4666
# os.environ['TORCH_HOME']='./basicmodel'
os.environ["CUDA_VISIBLE_DEVICES"] = str(3)

# fix random seed
np.random.seed(SEED)
torch.manual_seed(SEED)
# torch.backends.cudnn.enabled  = True
# torch.backends.cudnn.benchmark= True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### start experiment

In [98]:
iteration = args_input_iteration
model_batch = args_input_model_batch
num_train_epochs = 3

all_acc = []
acq_time = []

# Change "fp16_training" to True to support automatic mixed precision training (fp16)	
fp16_training = False

if fp16_training:
    !pip install accelerate==0.2.0
    from accelerate import Accelerator
    accelerator = Accelerator(fp16=True)
    device = accelerator.device

In [99]:
# repeate # iteration trials
while (iteration > 0): 
	iteration = iteration - 1

	## data, network, strategy
	model = AutoModelForQuestionAnswering.from_pretrained("bert-base-uncased").to(device)
	optimizer = AdamW(model.parameters(), lr=1e-4)

	start = datetime.datetime.now()

	## generate initial labeled pool
	n_pool = len(train_dataset)
	labeled_idxs = np.zeros(n_pool, dtype=bool)

	tmp_idxs = np.arange(n_pool)
	np.random.shuffle(tmp_idxs)
	labeled_idxs[tmp_idxs[:NUM_INIT_LB]] = True

	run_0_labeled_idxs = np.arange(n_pool)[labeled_idxs]

	## record acc performance 
	acc = np.zeros(NUM_ROUND + 1) # quota/batch runs + run_0

	## load the selected train data to DataLoader
	train_dataloader = DataLoader(
		train_dataset.select(indices=run_0_labeled_idxs),
		shuffle=True,
		collate_fn=default_data_collator,
		batch_size=8,
	)

	eval_dataloader = DataLoader(
		val_dataset, 
		collate_fn=default_data_collator, 
		batch_size=8
	)

	num_update_steps_per_epoch = len(train_dataloader)
	num_training_steps = num_train_epochs * num_update_steps_per_epoch

	lr_scheduler = get_scheduler(
		"linear",
		optimizer=optimizer,
		num_warmup_steps=0,
		num_training_steps=num_training_steps,
	)

	## print info
	print(DATA_NAME)
	print(STRATEGY_NAME)
	
	## round 0 accuracy
	to_train(num_train_epochs, train_dataloader, device, model, optimizer, lr_scheduler)

	acc[0] = get_pred(eval_dataloader, device, val_features, squad['validation'])['f1']

	# print('Round 0\ntesting accuracy {}'.format(acc[0]))
	# print('\n')
	
	# ## round 1 to rd
	# for rd in range(1, NUM_ROUND+1):
	# 	print('Round {}'.format(rd))

	# 	## query
	# 	if STRATEGY_NAME == 'RandomSampling':
	# 		q_idxs = random_sampling_query(labeled_idxs, NUM_QUERY)
	# 	elif STRATEGY_NAME == 'MarginSampling':
	# 		q_idxs = margin_sampling_query(n_pool, labeled_idxs, train_dataset, train_features, squad['train'], device, NUM_QUERY)
	# 	elif STRATEGY_NAME == 'LeastConfidence':
	# 		q_idxs = least_confidence_query(n_pool, labeled_idxs, train_dataset, train_features, squad['train'], device, NUM_QUERY)
	# 	elif STRATEGY_NAME == 'EntropySampling':
	# 		q_idxs = entropy_query(n_pool, labeled_idxs, train_dataset, train_features, squad['train'], device, NUM_QUERY)
	# 	elif STRATEGY_NAME == 'MarginSamplingDropout':
	# 		q_idxs = margin_sampling_dropout_query(n_pool, labeled_idxs, train_dataset, train_features, squad['train'], device, NUM_QUERY)
	# 	# elif STRATEGY_NAME == 'LeastConfidenceDropout':
	# 	# 	q_idxs = least_confidence_dropout_query(n_pool, labeled_idxs, train_dataset, train_features, squad['train'], device, NUM_QUERY)
	# 	# elif STRATEGY_NAME == 'EntropySamplingDropout':
	# 	# 	q_idxs = entropy_dropout_query(n_pool, labeled_idxs, train_dataset, train_features, squad['train'], device, NUM_QUERY)
	# 	# elif STRATEGY_NAME == 'VarRatio':
	# 	# 	q_idxs = var_ratio_query(n_pool, labeled_idxs, train_dataset, train_features, squad['train'], device, NUM_QUERY)
	# 	# elif STRATEGY_NAME == 'KMeansSampling':
	# 	# 	q_idxs = kmeans_query()
	# 	# elif STRATEGY_NAME == 'KCenterGreedy':
	# 	# 	q_idxs = kcenter_query()
	# 	# elif STRATEGY_NAME == 'KCenterGreedyPCA': # not sure
	# 	# 	q_idxs = 
	# 	# elif STRATEGY_NAME == 'BALDDropout':
	# 	# 	q_idxs = bayesian_query()
	# 	# elif STRATEGY_NAME == 'MeanSTD':
	# 	# 	q_idxs = mean_std_query()
	# 	# elif STRATEGY_NAME == 'BadgeSampling':
	# 	# 	q_idxs = badge_query()
	# 	# elif STRATEGY_NAME == 'LossPredictionLoss':
	# 	# 	# different net!
	# 	# 	q_idxs = loss_prediction_query()
	# 	# elif STRATEGY_NAME == 'CEALSampling':
	# 	# 	# why use 'CEALSampling' in STRATEGY_NAME
	# 	# 	q_idxs = ceal_query()
	# 	else:
	# 		raise NotImplementedError

	# 	## update
	# 	labeled_idxs[q_idxs] = True
	# 	run_rd_labeled_idxs = np.arange(n_pool)[labeled_idxs]

	# 	train_dataloader_rd = DataLoader(
	# 		train_dataset.select(indices=run_rd_labeled_idxs),
	# 		shuffle=True,
	# 		collate_fn=default_data_collator,
	# 		batch_size=8,
	# 	)

	# 	num_update_steps_per_epoch_rd = len(train_dataloader_rd)
	# 	num_training_steps_rd = num_train_epochs * num_update_steps_per_epoch_rd

	# 	lr_scheduler_rd = get_scheduler(
	# 		"linear",
	# 		optimizer=optimizer,
	# 		num_warmup_steps=0,
	# 		num_training_steps=num_training_steps_rd,
	# 	)

	# 	model_rd = AutoModelForQuestionAnswering.from_pretrained(model_dir).to(device)
	# 	optimizer_rd = AdamW(model_rd.parameters(), lr=1e-4)

	# 	## train
	# 	to_train(num_train_epochs, train_dataloader_rd, device, model_rd, optimizer_rd, lr_scheduler_rd)

	# 	## round rd accuracy
	# 	acc[rd] = get_pred(eval_dataloader, device, val_features, squad['validation'])['f1']
	# 	print('testing accuracy {}'.format(acc[rd]))
	# 	print('\n')

	# 	torch.cuda.empty_cache()
	
	# ## print results
	# print('SEED {}'.format(SEED))
	# print(STRATEGY_NAME)
	# print(acc)
	# all_acc.append(acc)
	
	# ## save model and record acq time
	# timestamp = re.sub('\.[0-9]*','_',str(datetime.datetime.now())).replace(" ", "_").replace("-", "").replace(":","")
	# final_model_dir = model_dir + '/' + timestamp + DATA_NAME+ '_'  + STRATEGY_NAME + '_' + str(NUM_QUERY) + '_' + str(NUM_INIT_LB) +  '_' + str(args_input.quota)
	# os.makedirs(final_model_dir, exist_ok=True)
	# end = datetime.datetime.now()
	# acq_time.append(round(float((end-start).seconds), 3))

	# final_model = AutoModelForQuestionAnswering.from_pretrained(model_dir).to(device)
	# model_to_save = final_model.module if hasattr(final_model, 'module') else final_model 
	# model_to_save.save_pretrained(final_model_dir)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_out

SQuAD
MarginSampling
num of train dataset 100


Training: 100%|██████████| 13/13 [00:06<00:00,  1.91it/s]
Training: 100%|██████████| 13/13 [00:06<00:00,  2.13it/s]
Training: 100%|██████████| 13/13 [00:06<00:00,  2.13it/s]


TRAIN done!


Evaluating_pred:   2%|▏         | 2/128 [00:00<00:26,  4.84it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.1609, -0.4776,  1.6267,  ..., -4.1186, -4.1159, -4.1160],
        [ 1.1367, -0.5126,  1.5342,  ..., -4.1293, -4.1263, -4.1259],
        [ 1.1774, -0.7024, -0.8416,  ..., -4.0546, -4.0568, -4.0419],
        ...,
        [ 1.0108, -1.7955, -1.2748,  ..., -4.1953, -4.1852, -4.1870],
        [ 0.7762, -0.9878,  0.0422,  ..., -3.6776, -3.6862, -3.6725],
        [ 0.9520, -1.4369, -1.1066,  ..., -3.8046, -3.7994, -3.7868]],
       device='cuda:0'), end_logits=tensor([[ 1.7034, -0.9657,  1.3177,  ..., -4.1354, -4.1366, -4.1280],
        [ 1.7113, -0.9988,  1.2208,  ..., -4.1465, -4.1472, -4.1379],
        [ 1.8976, -0.7341, -1.0309,  ..., -3.9949, -4.0025, -3.9878],
        ...,
        [ 1.6753, -1.5495, -1.5651,  ..., -4.0103, -4.0158, -4.0124],
        [ 0.6239, -0.9916, -0.3424,  ..., -3.7280, -3.7479, -3.7393],
        [ 1.0370, -1.2897, -1.1411,  ..., -3.5340, -3.5380, -3.5281]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:   3%|▎         | 4/128 [00:00<00:24,  5.10it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.0231, -1.5203, -0.4496,  ..., -3.9979, -3.9958, -3.9958],
        [ 1.1955, -1.3312, -0.0063,  ..., -4.0378, -4.0368, -4.0322],
        [ 1.2068, -1.3337, -0.3685,  ..., -4.0805, -4.0791, -4.0729],
        ...,
        [ 1.0749, -1.5096, -0.5662,  ..., -4.1034, -4.0923, -4.0752],
        [ 0.9253, -0.9292, -0.0823,  ..., -3.8793, -3.8665, -3.8494],
        [ 0.8607, -0.9761, -0.1980,  ..., -3.9374, -3.9218, -3.9050]],
       device='cuda:0'), end_logits=tensor([[ 1.6802, -1.3613, -0.2522,  ..., -3.9792, -3.9796, -3.9764],
        [ 1.6838, -1.1257,  0.4486,  ..., -3.8470, -3.8603, -3.8680],
        [ 1.8472, -1.1415,  0.4749,  ..., -3.9386, -3.9526, -3.9582],
        ...,
        [ 1.7842, -1.2837, -0.2769,  ..., -4.0299, -4.0327, -4.0306],
        [ 1.0252, -1.0504, -0.5092,  ..., -3.9244, -3.9299, -3.9230],
        [ 1.0745, -1.1064, -0.4909,  ..., -3.9866, -3.9871, -3.9786]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:   5%|▍         | 6/128 [00:01<00:23,  5.16it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.7266, -1.8772, -0.5023,  ..., -4.1506, -4.1459, -4.1445],
        [ 1.4136, -1.6023, -0.7316,  ..., -4.1373, -4.1385, -4.1376],
        [ 1.3988, -0.6917, -0.5914,  ..., -4.2308, -4.2282, -4.2279],
        ...,
        [ 1.4239, -0.8260, -0.8474,  ..., -4.1566, -4.1591, -4.1489],
        [ 1.2837, -1.1049, -0.3310,  ..., -4.1206, -4.1278, -4.1231],
        [ 1.7060, -1.1364, -1.1061,  ..., -4.1562, -4.1483, -4.1490]],
       device='cuda:0'), end_logits=tensor([[ 2.2755, -1.7791, -0.4209,  ..., -4.0362, -4.0406, -4.0289],
        [ 2.0662, -1.5494, -0.8040,  ..., -3.9821, -3.9736, -3.9758],
        [ 2.2089, -0.9621, -1.2714,  ..., -4.0796, -4.0827, -4.0803],
        ...,
        [ 2.0647, -1.1147, -1.9558,  ..., -3.9112, -3.9138, -3.9073],
        [ 2.0418, -1.4012, -0.1748,  ..., -3.9716, -3.9710, -3.9645],
        [ 1.9318, -1.5519, -2.1040,  ..., -4.0529, -4.0509, -4.0441]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:   6%|▋         | 8/128 [00:01<00:23,  5.13it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.6303, -0.8491, -0.7187,  ..., -4.1766, -4.1884, -4.1760],
        [ 1.7680, -1.1000, -0.3518,  ..., -4.1310, -4.1281, -4.1290],
        [ 1.7943, -1.0839, -0.8509,  ..., -4.1839, -4.1840, -4.1684],
        ...,
        [ 1.5617, -0.8155, -0.7884,  ..., -4.1966, -4.1924, -4.1801],
        [ 1.8285, -1.5662, -0.3733,  ..., -4.1164, -4.1131, -4.0985],
        [ 1.4637, -1.0419, -0.6663,  ..., -4.1606, -4.1601, -4.1605]],
       device='cuda:0'), end_logits=tensor([[ 2.1600, -1.0019, -1.3164,  ..., -3.9675, -3.9796, -3.9681],
        [ 2.0034, -1.3719, -1.5715,  ..., -4.0766, -4.0783, -4.0688],
        [ 2.2054, -1.1772, -1.4286,  ..., -4.0348, -4.0352, -4.0113],
        ...,
        [ 2.1002, -0.8291, -1.2198,  ..., -3.9735, -3.9776, -3.9600],
        [ 2.3255, -1.1646, -0.2091,  ..., -3.8896, -3.8930, -3.8745],
        [ 2.1302, -1.6372, -1.6805,  ..., -4.1651, -4.1728, -4.1659]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:   8%|▊         | 10/128 [00:01<00:22,  5.18it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.6487, -1.0830,  0.6461,  ..., -4.1415, -4.1371, -4.1241],
        [ 1.4993, -0.9662, -0.8319,  ..., -4.1091, -4.1202, -4.1223],
        [ 1.2881, -1.6852, -0.7713,  ..., -4.1790, -4.1829, -4.1861],
        ...,
        [ 1.3996, -1.0936, -0.4850,  ..., -4.0660, -4.0653, -4.0514],
        [ 1.7497, -1.1370,  0.8823,  ..., -4.0709, -4.0737, -4.0749],
        [ 1.6042, -1.4282, -0.0849,  ..., -4.0584, -4.0628, -4.0709]],
       device='cuda:0'), end_logits=tensor([[ 2.3810, -1.1486,  0.3432,  ..., -4.1016, -4.1000, -4.0824],
        [ 2.1746, -1.6080, -1.8489,  ..., -4.1946, -4.2062, -4.2007],
        [ 1.9815, -1.3949, -0.2817,  ..., -4.2196, -4.2193, -4.2106],
        ...,
        [ 2.1761, -1.4608, -1.1516,  ..., -4.0942, -4.0930, -4.0730],
        [ 2.3780, -0.9381,  0.2982,  ..., -4.1410, -4.1335, -4.1341],
        [ 2.2384, -1.5921, -0.2913,  ..., -4.2854, -4.2795, -4.2742]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:   9%|▉         | 12/128 [00:02<00:22,  5.16it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.4962, -0.4643, -0.1347,  ..., -4.0993, -4.1014, -4.1043],
        [ 1.2020, -1.7314, -1.1630,  ..., -4.1653, -4.1706, -4.1827],
        [ 1.5070, -0.7059, -0.5864,  ..., -4.0994, -4.0934, -4.0971],
        ...,
        [ 1.3163, -0.9256, -0.6211,  ..., -4.1786, -4.1712, -4.1743],
        [ 1.0082, -1.5485, -1.3328,  ..., -4.0973, -4.1062, -4.1154],
        [ 1.5677, -1.2960,  0.5702,  ..., -4.3544, -4.3556, -4.3532]],
       device='cuda:0'), end_logits=tensor([[ 2.2065e+00, -9.2638e-01, -3.6525e-03,  ..., -4.2450e+00,
         -4.2454e+00, -4.2366e+00],
        [ 1.9428e+00, -1.4436e+00, -1.8789e+00,  ..., -4.2588e+00,
         -4.2584e+00, -4.2652e+00],
        [ 2.1868e+00, -1.1614e+00, -4.5970e-01,  ..., -4.2653e+00,
         -4.2617e+00, -4.2578e+00],
        ...,
        [ 1.9997e+00, -1.2173e+00, -1.6626e+00,  ..., -4.3071e+00,
         -4.3067e+00, -4.3005e+00],
        [ 1.6474e+00, -1.3205e+00, -1.8838e+00,  ...

Evaluating_pred:  11%|█         | 14/128 [00:02<00:21,  5.22it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.2919, -1.1101, -0.3566,  ..., -4.2432, -4.2394, -4.2379],
        [ 1.1406, -1.5099, -0.3955,  ..., -4.1586, -4.1615, -4.1624],
        [ 1.4091, -0.4999, -0.3363,  ..., -4.1647, -4.1547, -4.1583],
        ...,
        [ 1.6274, -0.7811,  3.1185,  ..., -4.2166, -4.2194, -4.2164],
        [ 1.0411, -1.3275, -1.3034,  ..., -3.8894, -3.8742, -3.9011],
        [ 1.5774, -1.3763, -0.9362,  ..., -4.3157, -4.3126, -4.2949]],
       device='cuda:0'), end_logits=tensor([[ 1.9404, -1.3265, -0.1757,  ..., -4.2671, -4.2546, -4.2445],
        [ 2.0535, -1.5340, -1.3486,  ..., -4.0840, -4.0774, -4.0730],
        [ 2.1691, -0.9940, -0.7171,  ..., -4.3046, -4.3018, -4.2991],
        ...,
        [ 2.3277, -0.9826,  0.2672,  ..., -4.2773, -4.2839, -4.2662],
        [ 2.0460, -1.0658, -1.6222,  ..., -3.8633, -3.8556, -3.8720],
        [ 2.1424, -1.6146, -0.9037,  ..., -4.2493, -4.2501, -4.2226]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  12%|█▎        | 16/128 [00:03<00:21,  5.20it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.3614, -0.1999, -1.0522,  ..., -4.0717, -4.0609, -4.0183],
        [ 1.2737, -0.6779, -0.9824,  ..., -4.1328, -4.1523, -4.1435],
        [ 1.5482,  0.0785,  0.9388,  ..., -3.9608, -3.9376, -3.9562],
        ...,
        [ 1.4623, -0.5376,  2.4518,  ..., -4.1569, -4.1604, -4.1535],
        [ 1.4810, -0.6274,  2.2687,  ..., -4.1539, -4.1569, -4.1515],
        [ 1.4387, -0.5524,  2.1713,  ..., -4.1589, -4.1616, -4.1537]],
       device='cuda:0'), end_logits=tensor([[ 1.9921, -0.8688, -1.2143,  ..., -4.0169, -4.0238, -3.9939],
        [ 1.8610, -1.0423, -1.2407,  ..., -3.9206, -3.9461, -3.9532],
        [ 2.3372, -0.7795, -0.6305,  ..., -3.7509, -3.7236, -3.7302],
        ...,
        [ 1.9836, -1.0393,  1.2105,  ..., -4.1087, -4.1088, -4.1055],
        [ 2.0119, -1.1079,  1.0848,  ..., -4.0851, -4.0848, -4.0829],
        [ 2.0263, -1.0473,  1.1317,  ..., -4.0898, -4.0891, -4.0850]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  14%|█▍        | 18/128 [00:03<00:21,  5.21it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.5995, -1.5522, -1.3766,  ..., -4.0311, -4.0293, -4.0530],
        [ 1.1124, -1.2595, -0.7269,  ..., -3.9655, -3.9680, -3.9617],
        [ 1.6006, -1.5122, -1.1031,  ..., -4.0778, -4.0852, -4.0981],
        ...,
        [ 1.8014, -1.2927, -1.2510,  ..., -4.0532, -4.0404, -4.0460],
        [ 1.7704, -0.7977, -0.7705,  ..., -4.1782, -4.1839, -4.1753],
        [ 1.1408, -1.9427, -0.5063,  ..., -4.1851, -4.1986, -4.1855]],
       device='cuda:0'), end_logits=tensor([[ 1.7549, -1.3942, -1.0682,  ..., -3.9228, -3.9147, -3.9292],
        [ 1.4539, -1.2743, -1.2260,  ..., -3.7196, -3.7294, -3.7269],
        [ 1.8016, -1.2978, -0.7349,  ..., -3.8569, -3.8519, -3.8532],
        ...,
        [ 2.4006, -1.1332, -0.9537,  ..., -3.8146, -3.7948, -3.7935],
        [ 2.5735, -0.9102, -1.6473,  ..., -4.2932, -4.2921, -4.2743],
        [ 2.1842, -1.9559, -0.5494,  ..., -4.1769, -4.1853, -4.1636]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  16%|█▌        | 20/128 [00:03<00:20,  5.20it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.4296, -1.8114, -0.3802,  ..., -4.1302, -4.1278, -4.1230],
        [ 1.5198, -1.5716, -0.8907,  ..., -4.2196, -4.2341, -4.2402],
        [ 1.9270, -0.2270,  2.2522,  ..., -4.2259, -4.2134, -4.2296],
        ...,
        [ 1.1170, -1.3164, -0.3937,  ..., -4.2823, -4.2862, -4.3011],
        [ 1.9032, -1.6150, -1.0903,  ..., -4.1612, -4.1616, -4.1621],
        [ 1.5300, -1.6810, -0.3682,  ..., -4.1817, -4.1841, -4.1695]],
       device='cuda:0'), end_logits=tensor([[ 2.3615, -1.9080, -0.6766,  ..., -4.2235, -4.2056, -4.1869],
        [ 2.5133, -1.4354, -1.3495,  ..., -4.0949, -4.0882, -4.0858],
        [ 2.8174, -1.1434,  1.9011,  ..., -4.2011, -4.1767, -4.2006],
        ...,
        [ 2.2056, -2.2327, -1.0775,  ..., -4.1858, -4.1683, -4.1654],
        [ 2.9263, -1.4617, -0.6104,  ..., -4.2222, -4.2098, -4.2088],
        [ 2.4884, -1.5535, -0.3899,  ..., -4.0926, -4.0897, -4.0831]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  17%|█▋        | 22/128 [00:04<00:20,  5.21it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.5692, -1.0526, -1.0102,  ..., -4.1850, -4.1831, -4.1845],
        [ 1.3799, -1.3659, -1.8699,  ..., -4.2320, -4.2244, -4.2246],
        [ 1.2649, -1.0396,  0.1955,  ..., -4.1887, -4.1876, -4.1942],
        ...,
        [ 1.3508, -1.7742, -0.3741,  ..., -4.2339, -4.2305, -4.2245],
        [ 1.2314, -0.3900, -0.7396,  ..., -4.1184, -4.1374, -4.1372],
        [ 1.6022, -1.1433,  0.1645,  ..., -4.1704, -4.1486, -4.1322]],
       device='cuda:0'), end_logits=tensor([[ 2.3874, -1.0781, -1.4537,  ..., -4.2305, -4.2319, -4.2225],
        [ 2.1133, -1.6180, -1.8994,  ..., -4.2505, -4.2246, -4.2343],
        [ 1.9182, -1.0007,  0.2057,  ..., -4.1222, -4.1140, -4.1114],
        ...,
        [ 2.1348, -1.6839, -0.3760,  ..., -4.1295, -4.1308, -4.1220],
        [ 1.8342, -0.8182, -1.3654,  ..., -4.2387, -4.2417, -4.2415],
        [ 2.8957, -1.0961,  0.1767,  ..., -4.0884, -4.0849, -4.0722]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  19%|█▉        | 24/128 [00:04<00:20,  5.19it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.1778, -1.1740,  0.0815,  ..., -4.1650, -4.1579, -4.1428],
        [ 1.2940, -0.3831, -0.6765,  ..., -4.1890, -4.2009, -4.1985],
        [ 1.5316, -1.2674,  0.1148,  ..., -4.1893, -4.1957, -4.1903],
        ...,
        [ 1.2210, -0.8108, -0.4361,  ..., -4.1553, -4.1621, -4.1615],
        [ 1.6387, -1.4915, -0.6217,  ..., -4.1644, -4.1566, -4.1522],
        [ 1.8046, -1.0529, -0.2042,  ..., -4.1924, -4.2044, -4.1987]],
       device='cuda:0'), end_logits=tensor([[ 1.8519, -1.0480,  0.2431,  ..., -4.0431, -4.0336, -4.0358],
        [ 2.4482, -0.7123, -1.5996,  ..., -4.1431, -4.1510, -4.1493],
        [ 2.2700, -1.4094, -0.8621,  ..., -4.2122, -4.2203, -4.2184],
        ...,
        [ 1.8407, -1.4943, -0.7337,  ..., -4.3022, -4.3044, -4.2984],
        [ 2.2175, -1.2128, -1.7597,  ..., -4.2006, -4.1881, -4.1833],
        [ 2.1918, -1.4536, -1.7719,  ..., -4.2389, -4.2395, -4.2334]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  20%|██        | 26/128 [00:05<00:19,  5.22it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.1897, -1.1879, -0.4228,  ..., -4.1612, -4.1584, -4.1419],
        [ 1.7119, -1.2889,  0.0839,  ..., -4.1939, -4.2006, -4.1854],
        [ 1.7109, -0.9949, -0.5886,  ..., -4.1368, -4.1556, -4.1535],
        ...,
        [ 1.5272, -0.6261,  2.1876,  ..., -4.1227, -4.1237, -4.1194],
        [ 1.4105, -1.1620, -0.2141,  ..., -4.0425, -4.0368, -4.0420],
        [ 1.4133, -1.3065, -1.8535,  ..., -4.1825, -4.1832, -4.1839]],
       device='cuda:0'), end_logits=tensor([[ 1.5388, -1.5686, -0.4109,  ..., -4.1941, -4.1692, -4.1550],
        [ 2.3230, -1.2675,  0.4672,  ..., -4.2354, -4.2323, -4.1986],
        [ 2.2073, -1.3695, -0.7190,  ..., -4.2566, -4.2622, -4.2594],
        ...,
        [ 2.0598, -1.2842, -0.6268,  ..., -4.1979, -4.1791, -4.1814],
        [ 2.1052, -1.5152, -0.4903,  ..., -3.9722, -3.9535, -3.9691],
        [ 2.0134, -1.8772, -1.7461,  ..., -4.1640, -4.1573, -4.1539]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  22%|██▏       | 28/128 [00:05<00:19,  5.22it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.7168, -0.8345, -0.0140,  ..., -4.0197, -4.0195, -4.0207],
        [ 1.3285, -0.1284,  0.7570,  ..., -4.1650, -4.1689, -4.1649],
        [ 1.5857, -0.6556, -1.0598,  ..., -4.1378, -4.1381, -4.1377],
        ...,
        [ 1.5963, -0.8077, -1.1754,  ..., -4.0152, -4.0125, -4.0123],
        [ 1.5381, -1.4339, -0.6605,  ..., -4.1220, -4.1235, -4.1287],
        [ 1.6004, -1.3885,  0.2965,  ..., -4.0632, -4.0616, -4.0553]],
       device='cuda:0'), end_logits=tensor([[ 1.9945, -1.0755, -0.5604,  ..., -4.1401, -4.1443, -4.1195],
        [ 1.9657, -0.9521,  0.4462,  ..., -4.1274, -4.1454, -4.1350],
        [ 2.0867, -0.9917, -1.6948,  ..., -4.1877, -4.1850, -4.1897],
        ...,
        [ 2.2217, -0.9862, -1.7127,  ..., -4.0503, -4.0368, -4.0438],
        [ 1.9500, -1.7326, -0.8252,  ..., -4.1908, -4.1938, -4.1817],
        [ 2.1086, -1.3214, -0.0483,  ..., -3.9924, -3.9720, -3.9674]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  23%|██▎       | 30/128 [00:05<00:18,  5.22it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.1133, -1.6400, -1.1718,  ..., -4.1720, -4.1700, -4.1725],
        [ 1.8811, -0.8392,  1.2173,  ..., -4.1058, -4.1052, -4.1087],
        [ 1.7447, -1.4300,  0.1278,  ..., -4.0832, -4.0877, -4.0966],
        ...,
        [ 1.9695, -1.4552,  0.3936,  ..., -4.0265, -4.0371, -4.0282],
        [ 1.6800, -1.3809,  0.3483,  ..., -4.0371, -4.0394, -4.0406],
        [ 2.1378, -1.9525, -0.3382,  ..., -4.1435, -4.1393, -4.1393]],
       device='cuda:0'), end_logits=tensor([[ 1.6092, -1.4799, -0.4147,  ..., -4.1034, -4.1045, -4.0920],
        [ 2.5813, -1.3836,  1.2491,  ..., -4.3030, -4.3051, -4.3082],
        [ 2.4664, -1.4288, -0.3360,  ..., -4.1338, -4.1334, -4.1313],
        ...,
        [ 2.4170, -1.1295,  0.7603,  ..., -3.9741, -4.0034, -3.9732],
        [ 2.2895, -1.1642,  0.7636,  ..., -4.0500, -4.0532, -4.0490],
        [ 2.5501, -1.5451,  0.0609,  ..., -4.2257, -4.2233, -4.2121]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  25%|██▌       | 32/128 [00:06<00:18,  5.22it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 2.0641, -1.0594, -0.1537,  ..., -4.0849, -4.0746, -4.0807],
        [ 1.9098, -1.4800,  0.5245,  ..., -4.1130, -4.1151, -4.1023],
        [ 1.7483, -1.3830, -1.0756,  ..., -4.1087, -4.1095, -4.1115],
        ...,
        [ 2.2157, -0.8330, -0.1030,  ..., -3.8584, -3.8733, -3.8750],
        [ 1.8378, -1.5299,  0.5292,  ..., -4.1065, -4.1184, -4.1016],
        [ 1.8211, -0.9520, -0.2097,  ..., -4.0939, -4.0973, -4.1040]],
       device='cuda:0'), end_logits=tensor([[ 2.5475, -1.5988, -1.0169,  ..., -4.1482, -4.1473, -4.1473],
        [ 2.4373, -1.1607,  0.5454,  ..., -4.0128, -4.0163, -3.9805],
        [ 2.5039, -1.7746, -1.6186,  ..., -4.2387, -4.2430, -4.2289],
        ...,
        [ 2.5089, -1.1057, -0.6349,  ..., -3.9221, -3.9375, -3.9231],
        [ 2.4301, -1.3017,  0.1058,  ..., -4.0893, -4.0868, -4.0517],
        [ 2.5876, -1.4308, -0.9592,  ..., -4.2877, -4.2852, -4.2770]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  27%|██▋       | 34/128 [00:06<00:18,  5.21it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.7609, -0.8100, -1.1326,  ..., -2.0984, -2.1781, -2.4248],
        [ 2.0117, -0.7147, -1.0677,  ..., -3.9519, -3.9449, -3.9622],
        [ 1.7187, -0.9298, -1.1712,  ..., -1.4685, -2.0099, -2.4220],
        ...,
        [ 2.1638, -1.5061,  0.1779,  ..., -3.9475, -3.9390, -3.9481],
        [ 1.7093, -1.0210, -1.3096,  ..., -1.5167, -1.9521, -2.4503],
        [ 1.6648, -0.8822, -1.3679,  ..., -4.0719, -4.0719, -4.0692]],
       device='cuda:0'), end_logits=tensor([[ 2.4165, -0.7701, -1.3990,  ..., -2.8130, -2.3094, -1.1129],
        [ 2.0303, -1.0575, -1.2511,  ..., -4.3711, -4.3716, -4.3688],
        [ 2.3611, -1.3593, -1.2745,  ..., -1.6362, -1.3206, -1.1137],
        ...,
        [ 2.1860, -1.6009, -0.1120,  ..., -4.1928, -4.1939, -4.1898],
        [ 2.5249, -0.9130, -0.8207,  ..., -2.7954, -1.8323, -1.1234],
        [ 1.9578, -0.7309, -0.8700,  ..., -3.8011, -3.8065, -3.8188]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  28%|██▊       | 36/128 [00:06<00:17,  5.19it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.8520, -0.7955, -0.4152,  ..., -1.8910, -2.2833, -2.4343],
        [ 2.3137, -0.6119, -0.0755,  ..., -4.0145, -4.0227, -4.0152],
        [ 1.9845, -1.5073, -0.5093,  ..., -1.9832, -2.5798, -2.4305],
        ...,
        [ 2.2322, -1.2440,  0.9544,  ..., -3.6648, -3.6757, -3.6813],
        [ 1.8752, -1.4367,  0.0060,  ..., -2.1795, -2.1850, -2.4303],
        [ 2.0687, -1.4376,  0.0888,  ..., -3.9312, -3.9292, -3.9411]],
       device='cuda:0'), end_logits=tensor([[ 2.4025, -1.3642, -1.7829,  ..., -2.1099, -2.3345, -1.1189],
        [ 2.4223, -1.3913, -1.5878,  ..., -4.1351, -4.1416, -4.1351],
        [ 2.7706, -1.6597, -0.5036,  ..., -2.3419, -2.4007, -1.1091],
        ...,
        [ 2.3549, -1.7322, -0.1529,  ..., -4.1945, -4.1888, -4.1760],
        [ 2.4860, -1.1465,  0.4550,  ..., -2.0759, -2.5366, -1.1189],
        [ 2.0265, -1.3012,  0.5729,  ..., -4.1799, -4.1797, -4.1791]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  30%|██▉       | 38/128 [00:07<00:17,  5.20it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.8807, -1.1132, -0.2810,  ..., -2.2578, -2.4762, -2.4344],
        [ 1.4902, -0.7049,  0.0489,  ..., -3.8772, -3.8835, -3.9011],
        [ 2.0180, -1.4383, -1.2212,  ..., -4.2083, -4.2095, -4.1999],
        ...,
        [ 2.3141, -0.5851, -0.3490,  ..., -4.0383, -4.0466, -4.0397],
        [ 1.9872, -1.4435,  0.5374,  ..., -4.1740, -4.1890, -4.1849],
        [ 2.1449, -1.8649, -1.2615,  ..., -4.2097, -4.2121, -4.2232]],
       device='cuda:0'), end_logits=tensor([[ 2.4933, -1.4270, -1.5095,  ..., -2.1604, -2.3920, -1.1108],
        [ 1.8358, -1.4509, -1.2741,  ..., -4.3891, -4.4095, -4.3958],
        [ 2.6763, -0.9798, -1.5520,  ..., -4.1311, -4.1309, -4.1135],
        ...,
        [ 2.4750, -1.4847, -1.4998,  ..., -4.1199, -4.1258, -4.1178],
        [ 2.6556, -1.2101,  0.6912,  ..., -4.1448, -4.1613, -4.1501],
        [ 2.7018, -1.3628, -1.8061,  ..., -4.0847, -4.0914, -4.1005]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  31%|███▏      | 40/128 [00:07<00:16,  5.21it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.8277, -1.4909, -1.2279,  ..., -4.2322, -4.2320, -4.2264],
        [ 1.9839, -1.4137,  0.6404,  ..., -4.1170, -4.1415, -4.1460],
        [ 2.3513, -1.1255,  1.9415,  ..., -4.1643, -4.1561, -4.1485],
        ...,
        [ 1.8150, -1.2150,  0.3476,  ..., -4.1013, -4.0977, -4.0959],
        [ 1.6908, -1.3520,  0.1689,  ..., -4.0755, -4.0710, -4.0725],
        [ 1.7978, -1.0448, -0.9700,  ..., -4.0883, -4.0886, -4.0833]],
       device='cuda:0'), end_logits=tensor([[ 2.4914, -1.0973, -1.6530,  ..., -4.1445, -4.1391, -4.1162],
        [ 2.5614, -1.1544,  0.6924,  ..., -3.9614, -3.9992, -4.0119],
        [ 2.4683, -1.5810,  0.5538,  ..., -4.0879, -4.0764, -4.0680],
        ...,
        [ 2.4178, -1.0595,  0.1621,  ..., -4.1131, -4.1285, -4.1103],
        [ 2.3655, -1.2756,  0.0215,  ..., -4.0094, -4.0240, -4.0263],
        [ 2.1179, -1.3149, -1.8520,  ..., -4.1424, -4.1447, -4.1375]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  33%|███▎      | 42/128 [00:08<00:16,  5.20it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.8783, -0.9645,  0.3288,  ..., -4.1027, -4.1034, -4.1057],
        [ 2.0377, -1.1598,  1.9832,  ..., -4.1724, -4.1674, -4.1588],
        [ 1.9221, -1.2370,  0.2280,  ..., -4.1774, -4.1676, -4.1684],
        ...,
        [ 1.7655, -0.9533,  0.5277,  ..., -4.1483, -4.1484, -4.1265],
        [ 1.8396, -1.3461, -0.5203,  ..., -4.0770, -4.0793, -4.0815],
        [ 1.6664, -1.1601, -0.6666,  ..., -4.1603, -4.1551, -4.1627]],
       device='cuda:0'), end_logits=tensor([[ 2.2984, -1.1458, -0.2422,  ..., -4.1696, -4.1683, -4.1647],
        [ 2.2469, -1.5249,  0.5623,  ..., -4.0900, -4.0757, -4.0657],
        [ 2.3657, -1.6714, -1.4283,  ..., -4.1373, -4.1430, -4.1327],
        ...,
        [ 2.4822, -0.9538,  0.3931,  ..., -4.1409, -4.1439, -4.1298],
        [ 2.4446, -1.3507, -0.4217,  ..., -4.1201, -4.1160, -4.1157],
        [ 2.4193, -0.8161, -1.4035,  ..., -4.1303, -4.1255, -4.1205]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  34%|███▍      | 44/128 [00:08<00:16,  5.21it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.4953, -1.0175, -1.1302,  ..., -4.2304, -4.2286, -4.2252],
        [ 1.7440, -1.0860,  0.4114,  ..., -4.1602, -4.1661, -4.1691],
        [ 1.4475, -1.0603, -0.6026,  ..., -4.1422, -4.1330, -4.1168],
        ...,
        [ 1.5311, -1.3873, -0.8687,  ..., -4.1400, -4.1405, -4.1420],
        [ 1.5699, -0.9175,  0.1225,  ..., -4.2180, -4.2161, -4.2151],
        [ 1.5854, -1.5726, -0.5617,  ..., -4.1894, -4.1891, -4.1988]],
       device='cuda:0'), end_logits=tensor([[ 2.1788, -1.0945, -1.4629,  ..., -4.1611, -4.1561, -4.1485],
        [ 2.4448, -0.8401,  0.2717,  ..., -4.0961, -4.0990, -4.1029],
        [ 1.9554, -1.0549, -1.2792,  ..., -4.1559, -4.1566, -4.1298],
        ...,
        [ 2.0664, -1.0158, -1.7491,  ..., -4.0403, -4.0439, -4.0197],
        [ 2.2198, -0.8899, -0.1627,  ..., -4.2071, -4.2089, -4.1945],
        [ 2.2145, -1.3796, -0.4891,  ..., -4.2118, -4.2059, -4.1940]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  36%|███▌      | 46/128 [00:08<00:15,  5.17it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.5303, -0.9728, -0.9727,  ..., -4.1282, -4.1263, -4.1235],
        [ 1.4771, -0.9675,  0.4157,  ..., -4.1610, -4.1589, -4.1755],
        [ 1.2172, -1.2018,  1.6884,  ..., -4.2325, -4.2371, -4.2519],
        ...,
        [ 1.3754, -1.2390,  0.2586,  ..., -4.0197, -4.0224, -4.0272],
        [ 1.6453, -1.3213, -0.1600,  ..., -4.1293, -4.1352, -4.1346],
        [ 1.3264, -1.5091, -0.0790,  ..., -4.2050, -4.2066, -4.2180]],
       device='cuda:0'), end_logits=tensor([[ 2.1718, -0.9914, -1.0597,  ..., -4.1117, -4.1123, -4.1001],
        [ 1.9545, -0.9269, -0.1432,  ..., -4.1839, -4.1803, -4.1969],
        [ 1.9541, -1.4744,  0.5105,  ..., -4.1687, -4.1644, -4.1627],
        ...,
        [ 2.1153, -1.2989, -0.1248,  ..., -4.0883, -4.0842, -4.0749],
        [ 2.1859, -1.5465, -0.0510,  ..., -4.2383, -4.2464, -4.2487],
        [ 2.2073, -1.3255, -0.5242,  ..., -4.2163, -4.2092, -4.2078]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  38%|███▊      | 48/128 [00:09<00:15,  5.15it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.3671, -0.9996,  0.9102,  ..., -4.2122, -4.2015, -4.1961],
        [ 1.0258, -1.8504, -0.6236,  ..., -4.2121, -4.2149, -4.2220],
        [ 1.3323, -0.9619, -0.6698,  ..., -4.2052, -4.2069, -4.2084],
        ...,
        [ 1.4629, -1.0610,  0.7860,  ..., -4.2270, -4.2203, -4.2221],
        [ 1.1829, -0.8814, -1.0001,  ..., -4.1043, -4.1089, -4.1046],
        [ 1.3278, -1.4843, -0.1984,  ..., -4.1439, -4.1439, -4.1406]],
       device='cuda:0'), end_logits=tensor([[ 1.7480, -1.4232, -0.2322,  ..., -4.2092, -4.2216, -4.2186],
        [ 1.5065, -1.7584, -0.6946,  ..., -4.2237, -4.2276, -4.2205],
        [ 1.7961, -1.3880, -1.7440,  ..., -4.2323, -4.2374, -4.2378],
        ...,
        [ 1.8048, -1.4899, -0.3345,  ..., -4.1715, -4.1658, -4.1667],
        [ 1.4476, -1.9978, -1.1987,  ..., -4.2523, -4.2761, -4.2767],
        [ 1.9901, -1.5226, -0.3601,  ..., -4.1553, -4.1494, -4.1530]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  39%|███▉      | 50/128 [00:09<00:14,  5.20it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 2.0117, -0.8801, -0.7606,  ..., -4.1589, -4.1592, -4.1613],
        [ 1.9928, -1.1139,  0.6935,  ..., -4.1529, -4.1507, -4.1336],
        [ 1.9872, -1.4166, -0.3024,  ..., -4.1721, -4.1701, -4.1707],
        ...,
        [ 2.2130, -0.3144,  2.2579,  ..., -4.1684, -4.1733, -4.1758],
        [ 1.9928, -1.1139,  0.6935,  ..., -4.1529, -4.1507, -4.1336],
        [ 1.8687, -1.3722, -0.5064,  ..., -4.2404, -4.2428, -4.2246]],
       device='cuda:0'), end_logits=tensor([[ 2.6327, -1.3157, -2.1004,  ..., -4.2557, -4.2632, -4.2668],
        [ 2.8365, -1.6346, -0.2861,  ..., -4.0854, -4.0911, -4.0723],
        [ 2.7642, -1.5984, -1.1556,  ..., -4.1691, -4.1682, -4.1738],
        ...,
        [ 2.8661, -1.3519,  2.2670,  ..., -4.0824, -4.1234, -4.1229],
        [ 2.8365, -1.6346, -0.2861,  ..., -4.0854, -4.0911, -4.0723],
        [ 2.8335, -1.4021, -0.2050,  ..., -4.1485, -4.1575, -4.1388]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  41%|████      | 52/128 [00:10<00:14,  5.23it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.1292, -1.3466, -0.6287,  ..., -4.1996, -4.1994, -4.1971],
        [ 1.0901, -0.8313, -0.1917,  ..., -4.1067, -4.0993, -4.1036],
        [ 1.1496, -1.2843, -0.6313,  ..., -4.1172, -4.1178, -4.1274],
        ...,
        [ 1.5184, -0.8280, -0.2370,  ..., -4.1784, -4.1788, -4.1820],
        [ 1.0655, -0.9441,  1.9045,  ..., -4.1504, -4.1457, -4.1478],
        [ 1.4188, -0.7761,  0.1558,  ..., -4.1424, -4.1496, -4.1390]],
       device='cuda:0'), end_logits=tensor([[ 1.9021, -1.2912, -0.3119,  ..., -4.1439, -4.1404, -4.1396],
        [ 2.0171, -1.0175, -1.5276,  ..., -4.0224, -4.0109, -4.0166],
        [ 2.1376, -1.5315, -0.8414,  ..., -4.1613, -4.1537, -4.1517],
        ...,
        [ 2.1546, -1.1249, -1.3741,  ..., -4.0722, -4.0769, -4.0725],
        [ 1.7831, -1.1261,  0.3744,  ..., -4.1295, -4.1190, -4.1221],
        [ 2.0663, -1.0998,  0.0659,  ..., -4.2281, -4.2325, -4.2260]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  42%|████▏     | 54/128 [00:10<00:14,  5.21it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.3576, -1.4040, -1.0220,  ..., -4.0357, -4.0646, -4.0618],
        [ 1.1867, -1.7416, -1.5148,  ..., -4.1658, -4.1821, -4.1795],
        [ 1.5308, -1.5108, -1.2360,  ..., -4.1334, -4.1579, -4.1652],
        ...,
        [ 1.7209, -1.3268, -0.9471,  ..., -4.1086, -4.1169, -4.1114],
        [ 2.1294, -1.1868, -0.9593,  ..., -4.2106, -4.2115, -4.2027],
        [ 1.7708, -1.0951, -0.6116,  ..., -4.2002, -4.1793, -4.1725]],
       device='cuda:0'), end_logits=tensor([[ 2.2880, -1.3429, -1.4266,  ..., -4.2599, -4.2765, -4.2719],
        [ 1.9414, -1.6990, -1.5408,  ..., -4.1735, -4.1801, -4.1654],
        [ 2.6826, -1.3900, -1.9494,  ..., -4.2502, -4.2668, -4.2620],
        ...,
        [ 2.5268, -1.5316, -2.0429,  ..., -4.2247, -4.2294, -4.2189],
        [ 2.8717, -0.8871, -1.7807,  ..., -4.1695, -4.1560, -4.1430],
        [ 2.7117, -1.4421, -0.9500,  ..., -4.1741, -4.1613, -4.1471]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  44%|████▍     | 56/128 [00:10<00:13,  5.19it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.6982, -1.3068, -0.8129,  ..., -4.2092, -4.2111, -4.2068],
        [ 1.5524, -1.5337, -1.1784,  ..., -4.2520, -4.2504, -4.2449],
        [ 1.6102, -1.5875, -0.3871,  ..., -4.2641, -4.2704, -4.2627],
        ...,
        [ 1.6851, -0.4734, -1.0122,  ..., -4.3222, -4.3068, -4.3058],
        [ 1.6539, -0.7298, -1.4216,  ..., -4.2836, -4.2736, -4.2706],
        [ 1.7349, -0.8410, -1.3816,  ..., -4.2912, -4.2799, -4.2764]],
       device='cuda:0'), end_logits=tensor([[ 2.5842, -1.4734, -1.5230,  ..., -4.2931, -4.2849, -4.2669],
        [ 2.4642, -1.2794, -1.6816,  ..., -4.2263, -4.2088, -4.1978],
        [ 2.5106, -1.5324, -0.9651,  ..., -4.1965, -4.2141, -4.2057],
        ...,
        [ 2.2721, -0.7100, -1.1358,  ..., -4.3109, -4.2852, -4.2889],
        [ 2.1904, -0.7944, -1.2246,  ..., -4.1604, -4.1419, -4.1478],
        [ 2.2626, -0.8869, -1.1873,  ..., -4.1950, -4.1719, -4.1759]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  45%|████▌     | 58/128 [00:11<00:13,  5.20it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.4862, -0.4838, -0.8937,  ..., -4.2424, -4.2298, -4.2441],
        [ 1.5941, -0.6290,  0.3945,  ..., -4.1747, -4.1956, -4.1926],
        [ 1.8211, -0.0338, -0.7112,  ..., -4.2731, -4.2769, -4.2708],
        ...,
        [ 1.0382, -1.6297, -0.9355,  ..., -4.2379, -4.2339, -4.2285],
        [ 1.6139, -1.0544, -0.9367,  ..., -4.1500, -4.1465, -4.1311],
        [ 1.3726, -1.7169, -0.6067,  ..., -4.2196, -4.2186, -4.2159]],
       device='cuda:0'), end_logits=tensor([[ 2.1845, -0.4652, -1.5336,  ..., -4.2536, -4.2382, -4.2494],
        [ 2.0596, -0.8300,  0.3120,  ..., -4.2105, -4.2321, -4.2254],
        [ 2.3266, -0.2462, -1.3331,  ..., -4.2409, -4.2328, -4.2337],
        ...,
        [ 2.2322, -1.4856, -1.6147,  ..., -4.1158, -4.1008, -4.0905],
        [ 2.0638, -1.3552, -1.3884,  ..., -4.1301, -4.1255, -4.1053],
        [ 2.3815, -1.5714, -1.4680,  ..., -4.1029, -4.0939, -4.0913]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  47%|████▋     | 60/128 [00:11<00:13,  5.14it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.3322, -0.5605,  1.0774,  ..., -4.2673, -4.2620, -4.2650],
        [ 1.0171, -0.7379, -0.8404,  ..., -4.1776, -4.1816, -4.1807],
        [ 1.3000, -1.6349, -0.4352,  ..., -4.0884, -4.0950, -4.0871],
        ...,
        [ 1.0958, -1.0083, -1.1387,  ..., -4.0708, -4.0750, -4.0763],
        [ 1.2096, -1.0669,  1.2089,  ..., -4.2745, -4.2720, -4.2499],
        [ 1.3009, -1.3280,  0.2676,  ..., -4.2615, -4.2623, -4.2651]],
       device='cuda:0'), end_logits=tensor([[ 2.3750, -1.1565,  0.8399,  ..., -4.1413, -4.1383, -4.1414],
        [ 2.2545, -0.7087, -0.8514,  ..., -4.2629, -4.2555, -4.2553],
        [ 2.6601, -1.5482, -0.8753,  ..., -4.1942, -4.1991, -4.1809],
        ...,
        [ 1.9360, -1.3973, -1.3620,  ..., -4.3865, -4.3805, -4.3760],
        [ 1.6676, -1.1258,  0.0370,  ..., -4.1145, -4.1200, -4.0954],
        [ 2.0784, -1.3530,  0.1997,  ..., -4.3167, -4.3151, -4.3178]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  48%|████▊     | 62/128 [00:11<00:12,  5.16it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.2869, -0.7609, -1.0093,  ..., -4.1859, -4.1834, -4.1713],
        [ 1.3120, -0.5612, -0.7842,  ..., -4.0839, -4.0841, -4.0737],
        [ 1.8353, -0.6192,  0.3355,  ..., -4.0720, -4.0854, -4.0858],
        ...,
        [ 1.5440, -0.7869, -1.0533,  ..., -4.1072, -4.1095, -4.1145],
        [ 1.7201, -0.9545, -0.7480,  ..., -4.1996, -4.2007, -4.1995],
        [ 1.5031, -1.4869, -0.5909,  ..., -4.2240, -4.2316, -4.2307]],
       device='cuda:0'), end_logits=tensor([[ 1.9720, -1.1443, -1.1693,  ..., -3.9853, -3.9715, -3.9719],
        [ 2.4207, -0.8205, -1.4229,  ..., -4.2297, -4.2258, -4.2134],
        [ 2.6024, -0.7584, -0.2933,  ..., -4.1693, -4.1715, -4.1655],
        ...,
        [ 2.5800, -0.6741, -1.1771,  ..., -4.2317, -4.2290, -4.2486],
        [ 1.9411, -0.8254, -1.6895,  ..., -4.3107, -4.3064, -4.2902],
        [ 1.9067, -1.2359, -0.8137,  ..., -4.1267, -4.1195, -4.0965]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  50%|█████     | 64/128 [00:12<00:12,  5.20it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.2473, -1.2428, -0.5393,  ..., -4.1451, -4.1473, -4.1398],
        [ 1.5271, -1.0341,  0.1009,  ..., -3.9407, -3.9362, -3.9426],
        [ 0.9137, -1.0034, -0.0707,  ..., -3.8399, -3.8415, -3.8388],
        ...,
        [ 1.3388, -0.9791, -0.9023,  ..., -4.0132, -4.0118, -4.0170],
        [ 1.3161, -1.2588, -0.5355,  ..., -4.1081, -4.1019, -4.1073],
        [ 1.6169, -1.4345, -1.3543,  ..., -3.9308, -3.9281, -3.9242]],
       device='cuda:0'), end_logits=tensor([[ 1.6549, -1.0923, -0.3680,  ..., -3.9700, -3.9610, -3.9524],
        [ 1.9731, -1.0964, -0.4401,  ..., -3.9429, -3.9367, -3.9487],
        [ 1.4116, -1.2473, -0.5543,  ..., -3.8334, -3.8388, -3.8301],
        ...,
        [ 1.9016, -1.2580, -1.2387,  ..., -4.1160, -4.1179, -4.1186],
        [ 1.8511, -1.4834, -1.5890,  ..., -4.1988, -4.2051, -4.1892],
        [ 1.7670, -1.2217, -1.6772,  ..., -3.8657, -3.8475, -3.8388]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  52%|█████▏    | 66/128 [00:12<00:11,  5.23it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.2542, -1.6831, -1.2794,  ..., -4.0925, -4.0982, -4.1076],
        [ 1.4499, -1.7144, -1.2584,  ..., -4.1602, -4.1579, -4.1615],
        [ 1.3602, -2.0022, -0.5378,  ..., -4.1839, -4.1772, -4.1777],
        ...,
        [ 1.4012, -1.1209, -0.7887,  ..., -4.1253, -4.1194, -4.1248],
        [ 1.1636, -1.2267, -0.2345,  ..., -4.1896, -4.2046, -4.2055],
        [ 1.4418, -1.7634, -0.6277,  ..., -4.1508, -4.1534, -4.1428]],
       device='cuda:0'), end_logits=tensor([[ 1.9101, -1.4373, -1.6824,  ..., -4.1221, -4.1194, -4.0995],
        [ 1.9875, -1.4372, -0.8528,  ..., -4.0611, -4.0573, -4.0567],
        [ 1.9140, -1.5063,  0.5265,  ..., -4.1198, -4.1225, -4.1220],
        ...,
        [ 1.9702, -0.9991, -1.6611,  ..., -4.2965, -4.2986, -4.3089],
        [ 1.5013, -1.2290,  0.4298,  ..., -4.1262, -4.1290, -4.1268],
        [ 2.1354, -1.7347, -0.1735,  ..., -4.1976, -4.1959, -4.1829]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  53%|█████▎    | 68/128 [00:13<00:11,  5.24it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.3795, -1.2218,  1.7784,  ..., -4.0829, -4.0864, -4.0791],
        [ 1.6095,  0.0454,  0.2177,  ..., -3.3621, -3.3712, -3.3739],
        [ 1.1346, -1.2643, -0.4100,  ..., -3.9583, -3.9831, -3.9712],
        ...,
        [ 1.2915, -1.8142, -1.5974,  ..., -4.2157, -4.2154, -4.2135],
        [ 0.9475, -0.4904, -0.8826,  ..., -4.1063, -4.1217, -4.1198],
        [ 1.5856, -1.6298, -0.0634,  ..., -4.2586, -4.2415, -4.2513]],
       device='cuda:0'), end_logits=tensor([[ 1.9987, -1.5654,  0.5783,  ..., -3.9379, -3.9418, -3.9340],
        [ 1.9387,  0.1996,  0.6110,  ..., -3.6553, -3.6472, -3.6568],
        [ 1.5614, -1.0721, -0.8026,  ..., -3.6740, -3.7056, -3.7053],
        ...,
        [ 1.9433, -1.4551, -1.4826,  ..., -4.0166, -4.0092, -4.0033],
        [ 1.5749, -0.8427, -0.5891,  ..., -3.9078, -3.9127, -3.9099],
        [ 2.1554, -1.4351,  0.1506,  ..., -4.2172, -4.2519, -4.2437]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  55%|█████▍    | 70/128 [00:13<00:11,  5.21it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.1476, -1.2781, -1.4063,  ..., -4.1919, -4.1916, -4.1910],
        [ 0.9957,  0.4116,  1.1618,  ..., -4.1702, -4.1730, -4.1934],
        [ 0.9467, -1.7248, -1.1287,  ..., -4.2986, -4.2949, -4.2952],
        ...,
        [ 0.9971, -1.3237, -1.2578,  ..., -4.1747, -4.1696, -4.1820],
        [ 0.9205, -1.3483, -1.0128,  ..., -4.3275, -4.3319, -4.3305],
        [ 0.7957, -0.9666, -1.1454,  ..., -4.1855, -4.1856, -4.1732]],
       device='cuda:0'), end_logits=tensor([[ 1.7939, -1.4284, -1.4097,  ..., -4.0354, -4.0329, -4.0291],
        [ 1.7015, -0.6189, -0.5297,  ..., -3.9014, -3.9101, -3.9490],
        [ 1.6925, -1.3354, -1.1791,  ..., -4.2196, -4.2110, -4.1940],
        ...,
        [ 1.6590, -1.2273, -1.3308,  ..., -4.0268, -4.0200, -4.0330],
        [ 1.5801, -1.4247, -1.0614,  ..., -4.1159, -4.1165, -4.1116],
        [ 1.4690, -1.3709, -1.4672,  ..., -4.0157, -4.0181, -4.0200]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  56%|█████▋    | 72/128 [00:13<00:10,  5.20it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.3807, -1.0543, -0.9100,  ..., -4.1664, -4.1707, -4.1731],
        [ 1.3168,  0.1921,  0.6731,  ..., -4.3225, -4.3161, -4.3280],
        [ 1.0507, -1.7135, -0.9170,  ..., -4.2658, -4.2772, -4.2292],
        ...,
        [ 1.5007, -1.1962,  0.7671,  ..., -4.1646, -4.1190, -4.1234],
        [ 1.4104, -1.0028, -0.6501,  ..., -4.2579, -4.2725, -4.2772],
        [ 1.2468, -0.8421, -0.9399,  ..., -4.2831, -4.2336, -4.2401]],
       device='cuda:0'), end_logits=tensor([[ 2.2160, -1.1277, -1.6897,  ..., -4.2374, -4.2357, -4.2268],
        [ 1.9389, -0.0451,  2.1148,  ..., -4.0730, -4.0779, -4.0890],
        [ 1.8839, -1.3587,  0.0971,  ..., -4.1775, -4.1790, -4.1339],
        ...,
        [ 2.1646, -0.9081,  0.3562,  ..., -3.9634, -3.9043, -3.9040],
        [ 2.0873, -1.4537, -0.1964,  ..., -4.1365, -4.1375, -4.1516],
        [ 2.0731, -1.3727, -2.1226,  ..., -4.1545, -4.0883, -4.0940]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  58%|█████▊    | 74/128 [00:14<00:10,  5.16it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.2162, -0.2972,  1.0324,  ..., -4.1580, -4.1668, -4.1655],
        [ 1.3772, -0.7032, -0.0076,  ..., -4.1094, -4.1301, -4.1187],
        [ 1.1201, -1.1640, -0.5743,  ..., -4.1920, -4.1941, -4.1875],
        ...,
        [ 1.2178, -1.3073, -0.7029,  ..., -4.1962, -4.1969, -4.1860],
        [ 1.2944, -1.1119, -0.0614,  ..., -4.0756, -4.0566, -4.0600],
        [ 1.4131, -1.0542, -1.5482,  ..., -4.2165, -4.2149, -4.2207]],
       device='cuda:0'), end_logits=tensor([[ 2.4032, -0.9226,  0.3972,  ..., -4.1842, -4.1940, -4.1909],
        [ 2.3526, -0.7809, -0.6288,  ..., -4.2031, -4.2152, -4.2185],
        [ 2.0277, -1.4115, -1.8315,  ..., -4.1696, -4.1734, -4.1672],
        ...,
        [ 2.3318, -1.5650, -1.9483,  ..., -4.2033, -4.1967, -4.2019],
        [ 2.0184, -1.1943,  0.7766,  ..., -4.3140, -4.3206, -4.3281],
        [ 1.9814, -1.1883, -1.2502,  ..., -4.3146, -4.3258, -4.3152]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  59%|█████▉    | 76/128 [00:14<00:10,  5.16it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.3977, -0.5947,  1.3844,  ..., -4.0711, -4.0666, -4.0694],
        [ 1.4781, -1.2937, -1.1595,  ..., -4.0297, -4.0324, -4.0292],
        [ 1.2429, -1.2010,  1.1716,  ..., -4.1034, -4.1091, -4.1168],
        ...,
        [ 1.1061, -1.0612, -0.5831,  ..., -4.2807, -4.2779, -4.2539],
        [ 1.2872, -0.7021,  2.6952,  ..., -4.2159, -4.2175, -4.2158],
        [ 1.3652, -0.7635, -0.0696,  ..., -4.2381, -4.2613, -4.2540]],
       device='cuda:0'), end_logits=tensor([[ 2.3811, -0.7246,  0.8924,  ..., -4.3038, -4.3191, -4.3128],
        [ 2.1820, -1.0439, -1.4973,  ..., -4.1906, -4.1859, -4.1860],
        [ 2.2299, -1.4459,  0.4261,  ..., -4.2012, -4.1932, -4.1847],
        ...,
        [ 1.6283, -1.2880,  0.0613,  ..., -4.2046, -4.2023, -4.1845],
        [ 1.9676, -1.1984, -0.4181,  ..., -4.2423, -4.2348, -4.2360],
        [ 2.1345, -1.4246, -0.5116,  ..., -4.2666, -4.2704, -4.2686]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  61%|██████    | 78/128 [00:15<00:09,  5.17it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.8063, -0.6969, -0.1781,  ..., -4.0188, -4.0228, -4.0260],
        [ 1.8546, -0.2819,  0.1118,  ..., -4.0186, -4.0211, -4.0360],
        [ 1.9419, -0.6013,  1.9051,  ..., -4.0703, -4.0747, -4.0783],
        ...,
        [ 1.7470, -0.9338, -0.0989,  ..., -4.1260, -4.1319, -4.1405],
        [ 1.7265, -1.3766, -0.6736,  ..., -4.0954, -4.0999, -4.1060],
        [ 1.5447, -1.2323, -0.9174,  ..., -4.1274, -4.1260, -4.1202]],
       device='cuda:0'), end_logits=tensor([[ 2.8393, -1.3415,  0.1269,  ..., -4.3213, -4.3218, -4.3168],
        [ 2.7197, -0.9532,  0.2694,  ..., -4.3397, -4.3245, -4.3295],
        [ 2.9201, -1.2247, -0.9028,  ..., -4.2661, -4.2701, -4.2716],
        ...,
        [ 2.6594, -1.3751,  0.0190,  ..., -4.2751, -4.2762, -4.2760],
        [ 2.7921, -1.5501, -0.3575,  ..., -4.3186, -4.3122, -4.3130],
        [ 2.8166, -1.3817, -0.4237,  ..., -4.2841, -4.2787, -4.2795]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  62%|██████▎   | 80/128 [00:15<00:09,  5.18it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.8864, -0.7751, -0.0477,  ..., -4.1585, -4.1607, -4.1610],
        [ 2.2420, -1.1957,  0.4221,  ..., -4.1540, -4.1498, -4.1422],
        [ 1.7990, -0.9615, -0.1932,  ..., -4.1489, -4.1445, -4.1575],
        ...,
        [ 1.1229, -0.9928,  2.3110,  ..., -4.1772, -4.1737, -4.1739],
        [ 0.9575, -0.7701, -0.8400,  ..., -4.1688, -4.1555, -4.1545],
        [ 1.0966, -1.3596, -1.1949,  ..., -4.1430, -4.1424, -4.1402]],
       device='cuda:0'), end_logits=tensor([[ 2.5055, -0.9607,  0.0417,  ..., -4.2191, -4.2182, -4.2067],
        [ 2.9437, -1.1783,  0.1926,  ..., -4.1214, -4.1155, -4.1132],
        [ 2.5135, -1.5464, -0.0106,  ..., -4.2677, -4.2587, -4.2618],
        ...,
        [ 2.1883, -1.1996, -0.5984,  ..., -4.2938, -4.2806, -4.2765],
        [ 2.1098, -1.5862, -1.2631,  ..., -4.3270, -4.3143, -4.3042],
        [ 2.0911, -1.3704, -1.6294,  ..., -4.1644, -4.1542, -4.1405]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  64%|██████▍   | 82/128 [00:15<00:08,  5.18it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.0572, -0.6211, -0.6935,  ..., -4.0601, -4.0533, -4.0588],
        [ 1.0474, -0.9237, -0.4208,  ..., -4.0550, -4.0551, -4.0535],
        [ 0.8978, -0.7559, -0.7518,  ..., -3.2540, -3.2821, -3.2893],
        ...,
        [ 0.9446, -0.8118, -0.8087,  ..., -4.0207, -4.0228, -4.0215],
        [ 0.6321, -1.0491, -0.7637,  ..., -4.1379, -4.1408, -4.1331],
        [ 0.6196, -1.1494, -1.1213,  ..., -4.1775, -4.1768, -4.1870]],
       device='cuda:0'), end_logits=tensor([[ 1.6819, -1.2362, -1.1902,  ..., -4.0643, -4.0580, -4.0558],
        [ 1.6017, -1.3802, -1.1694,  ..., -4.1352, -4.1396, -4.1394],
        [ 1.4813, -1.3946, -1.3249,  ..., -3.6239, -3.6603, -3.6586],
        ...,
        [ 1.5456, -1.2160, -0.8567,  ..., -4.0465, -4.0582, -4.0498],
        [ 1.2489, -1.2631, -0.8840,  ..., -4.1106, -4.0999, -4.1017],
        [ 1.1077, -1.3843, -1.1138,  ..., -4.0621, -4.0682, -4.0625]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  66%|██████▌   | 84/128 [00:16<00:08,  5.19it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.6556, -1.7790, -1.3147,  ..., -4.2545, -4.2575, -4.2578],
        [ 1.8857, -1.1796, -1.6728,  ..., -4.2559, -4.2578, -4.2516],
        [ 1.2072, -0.5330, -0.6531,  ..., -4.1705, -4.1739, -4.1661],
        ...,
        [ 1.5014, -0.7194, -0.2180,  ..., -4.0868, -4.0922, -4.0904],
        [ 1.1253, -0.9954, -0.4200,  ..., -4.1236, -4.1221, -4.1202],
        [ 1.8093, -1.2641,  0.1559,  ..., -4.1265, -4.1143, -4.1181]],
       device='cuda:0'), end_logits=tensor([[ 2.4873, -1.3731, -1.2686,  ..., -4.1874, -4.1759, -4.1757],
        [ 2.4094, -1.2593, -1.4069,  ..., -3.9965, -3.9973, -3.9990],
        [ 1.9715, -1.1297, -1.2972,  ..., -4.1358, -4.1320, -4.1253],
        ...,
        [ 2.3768, -1.1046, -0.1595,  ..., -4.1370, -4.1384, -4.1380],
        [ 1.9920, -1.4179, -0.9102,  ..., -4.1717, -4.1678, -4.1602],
        [ 2.9566, -1.5174, -1.1363,  ..., -4.1105, -4.1016, -4.1089]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  67%|██████▋   | 86/128 [00:16<00:08,  5.19it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.2759, -1.1821, -0.9199,  ..., -4.2943, -4.2925, -4.2927],
        [ 1.1415, -1.3831, -1.5068,  ..., -4.2039, -4.2003, -4.1866],
        [ 1.4502, -1.0479, -1.3160,  ..., -4.2717, -4.2829, -4.2767],
        ...,
        [ 1.0291, -0.7797,  0.4961,  ..., -4.2102, -4.2208, -4.2101],
        [ 1.2623, -1.4364, -0.0120,  ..., -4.1449, -4.1631, -4.1014],
        [ 1.3429, -1.4885, -0.2470,  ..., -4.1491, -4.1550, -4.1740]],
       device='cuda:0'), end_logits=tensor([[ 1.9998, -1.6751, -2.0456,  ..., -4.1803, -4.1777, -4.1843],
        [ 2.0790, -2.0364, -1.4246,  ..., -4.0384, -4.0415, -4.0342],
        [ 1.8637, -1.6049, -1.7681,  ..., -4.0976, -4.1072, -4.1077],
        ...,
        [ 1.8158, -1.3178,  0.3064,  ..., -4.0872, -4.0850, -4.0780],
        [ 1.9621, -1.3971, -0.2776,  ..., -4.0900, -4.0993, -4.0604],
        [ 2.0174, -1.4242, -0.6934,  ..., -4.0933, -4.0900, -4.1046]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  69%|██████▉   | 88/128 [00:16<00:07,  5.16it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.3358, -0.4318, -0.2196,  ..., -4.1129, -4.1105, -4.0910],
        [ 1.2787, -1.3031, -0.6392,  ..., -4.0885, -4.0769, -4.0712],
        [ 1.5637, -0.7368, -0.3147,  ..., -4.1056, -4.1124, -4.1227],
        ...,
        [ 1.7460, -1.4142, -0.3200,  ..., -4.0600, -4.0410, -4.0506],
        [ 1.5095, -0.7591, -0.7272,  ..., -4.0315, -4.0185, -4.0397],
        [ 1.2823, -1.2568, -1.9323,  ..., -4.2632, -4.2639, -4.2650]],
       device='cuda:0'), end_logits=tensor([[ 1.7684, -1.0046, -0.7619,  ..., -4.1988, -4.1987, -4.1660],
        [ 1.5691, -1.2423, -0.0555,  ..., -4.1115, -4.0885, -4.0735],
        [ 2.0570, -1.2111, -0.5219,  ..., -4.0952, -4.0932, -4.1118],
        ...,
        [ 1.9328, -1.3272, -0.5030,  ..., -4.1306, -4.1237, -4.1157],
        [ 2.1879, -1.3190, -0.8409,  ..., -4.0865, -4.0868, -4.1100],
        [ 2.3741, -1.7877, -1.7883,  ..., -4.2866, -4.2845, -4.2817]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  70%|███████   | 90/128 [00:17<00:07,  5.18it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.4991,  0.0439,  1.8048,  ..., -4.1004, -4.1026, -4.1277],
        [ 1.4354, -1.2241, -0.8337,  ..., -4.2552, -4.2604, -4.2419],
        [ 1.4783, -0.9409, -0.2896,  ..., -4.3215, -4.3284, -4.3253],
        ...,
        [ 1.7097, -0.7283, -0.1742,  ..., -4.0889, -4.0933, -4.1147],
        [ 1.5151, -0.5988,  0.0443,  ..., -4.2209, -4.2199, -4.2075],
        [ 1.9373, -0.8013,  0.1506,  ..., -4.1262, -4.1180, -4.1296]],
       device='cuda:0'), end_logits=tensor([[ 2.5106, -1.4297, -0.1607,  ..., -4.1193, -4.1303, -4.1765],
        [ 2.5854, -1.1124, -0.5414,  ..., -4.1174, -4.1193, -4.0992],
        [ 2.5037, -1.8515, -0.8765,  ..., -4.2071, -4.2047, -4.1854],
        ...,
        [ 2.5598, -1.1198, -0.9800,  ..., -4.2297, -4.2281, -4.2205],
        [ 2.0673, -1.1384, -0.3191,  ..., -4.0966, -4.0954, -4.0974],
        [ 2.2036, -1.3714, -0.5258,  ..., -4.1087, -4.1059, -4.0934]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  72%|███████▏  | 92/128 [00:17<00:06,  5.17it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.6024, -1.3970, -0.8678,  ..., -4.1534, -4.1791, -4.1632],
        [ 1.8190, -0.4686,  0.9692,  ..., -4.1459, -4.1473, -4.1450],
        [ 1.2620, -1.1446,  0.0737,  ..., -4.2438, -4.2505, -4.2461],
        ...,
        [ 1.5498, -0.8666,  1.7866,  ..., -4.2396, -4.2405, -4.2399],
        [ 1.5893, -0.8075, -0.4639,  ..., -4.0634, -4.0609, -4.0645],
        [ 1.5366, -0.7563, -0.5305,  ..., -4.1794, -4.1945, -4.2010]],
       device='cuda:0'), end_logits=tensor([[ 2.3197, -1.1237, -1.7042,  ..., -4.0611, -4.0918, -4.0659],
        [ 2.2430, -1.0067, -0.3951,  ..., -4.0979, -4.0980, -4.0890],
        [ 2.0111, -1.7674,  0.4274,  ..., -4.3229, -4.3304, -4.3323],
        ...,
        [ 2.1322, -1.0560,  0.3893,  ..., -4.2782, -4.2784, -4.2790],
        [ 2.1636, -1.3474, -1.3026,  ..., -4.2467, -4.2552, -4.2575],
        [ 2.1986, -1.6127, -0.2030,  ..., -4.3162, -4.3162, -4.3181]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  73%|███████▎  | 94/128 [00:18<00:06,  5.17it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.9283, -0.8121,  0.3726,  ..., -4.1424, -4.1751, -4.1746],
        [ 1.5855, -1.1768, -0.0285,  ..., -4.0452, -4.0356, -4.0353],
        [ 1.7166, -1.3229,  0.2787,  ..., -4.1163, -4.1263, -4.1260],
        ...,
        [ 1.5207, -0.8489, -0.5328,  ..., -4.1232, -4.1180, -4.1155],
        [ 2.0041, -1.3950, -0.0993,  ..., -4.0704, -4.0734, -4.0685],
        [ 1.1295, -1.0121, -0.5834,  ..., -4.0521, -4.0579, -4.0717]],
       device='cuda:0'), end_logits=tensor([[ 2.5364, -1.2972,  0.1092,  ..., -4.1631, -4.1910, -4.1885],
        [ 2.4588, -0.8750, -0.2466,  ..., -4.1377, -4.1300, -4.1255],
        [ 2.6441, -1.2817, -0.2144,  ..., -4.2831, -4.2988, -4.2902],
        ...,
        [ 2.3418, -1.5601, -0.6163,  ..., -4.1774, -4.1762, -4.1757],
        [ 2.5445, -1.1126, -0.2835,  ..., -4.1594, -4.1495, -4.1493],
        [ 2.0590, -1.4495, -0.8272,  ..., -4.2918, -4.2913, -4.3040]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  75%|███████▌  | 96/128 [00:18<00:06,  5.13it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.2397, -1.2041,  0.2404,  ..., -4.1505, -4.1566, -4.1581],
        [ 1.5295, -1.0628, -0.2970,  ..., -4.1343, -4.1364, -4.1345],
        [ 1.6593, -1.1725,  0.3364,  ..., -4.1111, -4.1093, -4.1011],
        ...,
        [ 1.2697, -0.9233,  0.1673,  ..., -4.1874, -4.1953, -4.1834],
        [ 1.2979, -0.9835,  0.3091,  ..., -4.0917, -4.0954, -4.1011],
        [ 1.4424, -1.1766, -0.6464,  ..., -4.0642, -4.0737, -4.0710]],
       device='cuda:0'), end_logits=tensor([[ 1.9013, -1.4079, -0.4371,  ..., -4.2602, -4.2573, -4.2497],
        [ 2.3499, -1.5944, -0.4853,  ..., -4.2655, -4.2659, -4.2442],
        [ 2.5611, -0.8821,  0.0709,  ..., -4.2436, -4.2330, -4.2231],
        ...,
        [ 2.0085, -1.6191, -0.7733,  ..., -4.1512, -4.1485, -4.1250],
        [ 2.2115, -1.3358, -0.3372,  ..., -4.3502, -4.3558, -4.3438],
        [ 2.3372, -0.7566, -1.1288,  ..., -4.1966, -4.1976, -4.1765]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  77%|███████▋  | 98/128 [00:18<00:05,  5.16it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.7032, -0.5302,  2.0005,  ..., -4.1620, -4.1589, -4.1601],
        [ 1.5208, -0.7901,  0.0664,  ..., -4.1898, -4.1916, -4.1840],
        [ 1.8008, -0.7833, -1.0878,  ..., -4.1812, -4.1941, -4.1969],
        ...,
        [ 1.7663, -1.0636,  0.2359,  ..., -4.0993, -4.1032, -4.1010],
        [ 1.5324, -0.7445,  0.4834,  ..., -4.1296, -4.1262, -4.1178],
        [ 1.3886,  1.0650,  0.6525,  ..., -3.9307, -3.9266, -3.9517]],
       device='cuda:0'), end_logits=tensor([[ 2.4281, -1.0651,  1.6176,  ..., -4.1427, -4.1333, -4.1359],
        [ 1.9405, -1.4736, -0.6784,  ..., -4.0141, -4.0121, -4.0117],
        [ 2.5862, -1.3211, -1.5875,  ..., -4.2509, -4.2702, -4.2621],
        ...,
        [ 2.5581, -1.5008, -0.5860,  ..., -4.2680, -4.2639, -4.2626],
        [ 2.1691, -1.5684, -0.5595,  ..., -4.1867, -4.1777, -4.1742],
        [ 2.3412,  0.0848,  0.6144,  ..., -4.1341, -4.1115, -4.1283]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  78%|███████▊  | 100/128 [00:19<00:05,  5.13it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.5016, -1.3071,  0.1946,  ..., -3.9317, -3.9260, -3.9263],
        [ 1.7343, -1.1271, -0.0574,  ..., -4.0903, -4.0943, -4.1001],
        [ 1.8705, -1.1380, -1.7875,  ..., -4.2029, -4.2064, -4.2069],
        ...,
        [ 2.2870, -0.6717, -0.4609,  ..., -4.0376, -4.0397, -4.0307],
        [ 1.7103, -1.1030, -0.7295,  ..., -4.1352, -4.1368, -4.1381],
        [ 1.8919, -0.9762, -0.1788,  ..., -4.1206, -4.1191, -4.1149]],
       device='cuda:0'), end_logits=tensor([[ 2.2951, -1.3086,  0.1334,  ..., -4.2314, -4.2248, -4.2374],
        [ 2.6480, -1.1570, -0.3543,  ..., -4.1317, -4.1326, -4.1512],
        [ 2.4087, -1.2436, -1.5347,  ..., -4.2599, -4.2535, -4.2469],
        ...,
        [ 2.3931, -1.0274, -0.7938,  ..., -3.8910, -3.9072, -3.9050],
        [ 2.4509, -1.4306, -1.9386,  ..., -4.1321, -4.1328, -4.1260],
        [ 2.6131, -1.6400, -0.6610,  ..., -4.2178, -4.2159, -4.2100]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  80%|███████▉  | 102/128 [00:19<00:05,  5.14it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.9737, -1.2012, -1.4152,  ..., -4.1145, -4.1101, -4.1077],
        [ 2.0131, -1.3980,  0.6539,  ..., -4.0631, -4.0767, -4.0694],
        [ 1.8019, -1.3435,  0.5225,  ..., -4.0909, -4.0914, -4.0912],
        ...,
        [ 1.9706, -0.9961,  0.4070,  ..., -3.9641, -3.9642, -3.9581],
        [ 1.7291, -1.4030,  0.7251,  ..., -4.0424, -4.0537, -4.0620],
        [ 1.7926, -1.3665,  0.5804,  ..., -4.0426, -4.0524, -4.0607]],
       device='cuda:0'), end_logits=tensor([[ 2.3471, -1.1019, -1.0434,  ..., -3.8853, -3.8833, -3.8916],
        [ 2.7110, -1.2181,  0.3558,  ..., -4.1047, -4.1223, -4.1008],
        [ 2.7629, -0.9891,  0.6389,  ..., -4.0452, -4.0533, -4.0549],
        ...,
        [ 2.5973, -0.7511,  0.6032,  ..., -3.9663, -3.9637, -3.9641],
        [ 2.6715, -1.0593,  0.6846,  ..., -4.0313, -4.0352, -4.0498],
        [ 2.7540, -1.0247,  0.7861,  ..., -4.0480, -4.0489, -4.0611]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  81%|████████▏ | 104/128 [00:20<00:04,  5.15it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.9289, -1.2692,  0.2544,  ..., -4.0233, -4.0109, -4.0032],
        [ 1.6705, -1.2682,  0.5433,  ..., -3.9768, -3.9693, -3.9679],
        [ 1.7505, -1.1797, -0.7348,  ..., -4.2037, -4.1996, -4.1934],
        ...,
        [ 2.0898, -1.2887,  0.3931,  ..., -4.0819, -4.0827, -4.0748],
        [ 1.9431, -1.4829,  0.3178,  ..., -4.1065, -4.1094, -4.0928],
        [ 1.9391, -1.3478, -1.1428,  ..., -4.1151, -4.1156, -4.1106]],
       device='cuda:0'), end_logits=tensor([[ 2.4442, -0.9965,  0.1734,  ..., -3.9950, -3.9883, -3.9769],
        [ 2.4390, -0.8987,  0.2819,  ..., -3.9664, -3.9578, -3.9515],
        [ 2.7167, -1.6513, -0.7562,  ..., -4.2724, -4.2681, -4.2483],
        ...,
        [ 2.8879, -0.9131,  0.3034,  ..., -4.1225, -4.1137, -4.0960],
        [ 2.7966, -1.0673,  0.1313,  ..., -4.1390, -4.1308, -4.1039],
        [ 2.9223, -1.7820, -2.0265,  ..., -4.2411, -4.2437, -4.2354]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  83%|████████▎ | 106/128 [00:20<00:04,  5.14it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.1974, -0.8781, -1.2038,  ..., -4.3127, -4.3163, -4.3102],
        [ 1.2750,  0.0849, -0.5410,  ..., -4.2419, -4.2434, -4.2486],
        [ 1.3312, -0.9107,  1.1684,  ..., -4.1504, -4.1500, -4.1451],
        ...,
        [ 2.3107, -1.2080, -0.1402,  ..., -4.0928, -4.1076, -4.0847],
        [ 1.7161, -1.3483,  0.0673,  ..., -2.3825, -4.0775, -4.1227],
        [ 1.3175, -1.3486, -0.5262,  ..., -4.0180, -4.0636, -4.0674]],
       device='cuda:0'), end_logits=tensor([[ 1.5064, -1.4358, -2.1693,  ..., -4.2123, -4.2041, -4.1941],
        [ 2.0497, -1.4864, -1.9871,  ..., -4.1176, -4.1219, -4.1048],
        [ 1.9795, -1.2268, -0.3120,  ..., -4.1545, -4.1434, -4.1369],
        ...,
        [ 2.9807, -1.6960,  1.8303,  ..., -4.1361, -4.1517, -4.1228],
        [ 2.4464, -1.1378, -0.0865,  ..., -1.0537, -4.1126, -4.1486],
        [ 2.1188, -1.6846,  0.5630,  ..., -4.0598, -4.1115, -4.1080]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  84%|████████▍ | 108/128 [00:20<00:03,  5.12it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 2.3297, -1.5745, -1.4613,  ..., -4.1754, -4.1872, -4.1783],
        [ 2.0641, -1.8073, -1.7030,  ..., -4.1310, -4.1357, -4.1373],
        [ 1.2038, -1.9163, -1.3516,  ..., -4.2200, -4.2158, -4.2185],
        ...,
        [ 0.8143, -1.7409, -0.4350,  ..., -4.0049, -4.0041, -3.9981],
        [ 1.6595, -1.5869, -1.2176,  ..., -4.1473, -4.1442, -4.1406],
        [ 1.6439, -0.2814, -0.5779,  ..., -3.9460, -3.9691, -3.9648]],
       device='cuda:0'), end_logits=tensor([[ 2.9154, -1.6865, -2.3136,  ..., -4.0731, -4.0769, -4.0725],
        [ 2.4417, -1.8358, -1.8899,  ..., -4.0984, -4.0854, -4.0699],
        [ 2.0035, -1.6310, -1.2861,  ..., -3.9053, -3.8897, -3.8843],
        ...,
        [ 1.6703, -1.4444, -0.5585,  ..., -3.9239, -3.9291, -3.9154],
        [ 2.2364, -1.3600, -1.2499,  ..., -3.9063, -3.8966, -3.8802],
        [ 2.4606, -1.0431, -1.2851,  ..., -3.9951, -4.0172, -4.0282]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  86%|████████▌ | 110/128 [00:21<00:03,  5.15it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.4627, -1.0772, -1.1016,  ..., -4.2230, -4.2299, -4.2268],
        [ 0.9534, -1.9407, -1.2994,  ..., -4.3305, -4.3267, -4.3299],
        [ 1.4967, -1.7900, -1.0957,  ..., -4.3045, -4.3021, -4.3116],
        ...,
        [ 0.6217, -1.7341, -0.6107,  ..., -4.2605, -4.2565, -4.2580],
        [ 0.7117, -1.4917, -0.7606,  ..., -4.2854, -4.2927, -4.2397],
        [ 0.6398, -0.7821, -1.0340,  ..., -4.2164, -4.2227, -4.2006]],
       device='cuda:0'), end_logits=tensor([[ 2.3131, -0.8562, -0.9073,  ..., -4.2803, -4.2890, -4.2844],
        [ 1.6045, -1.7570, -1.3482,  ..., -4.1678, -4.1571, -4.1446],
        [ 2.1547, -1.6355, -1.4639,  ..., -4.1566, -4.1527, -4.1485],
        ...,
        [ 1.7151, -1.4879, -0.4627,  ..., -4.1667, -4.1631, -4.1499],
        [ 1.5774, -1.3967, -0.8990,  ..., -3.8630, -3.8795, -3.8379],
        [ 1.4132, -0.8944, -1.2714,  ..., -3.9534, -3.9617, -3.9299]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  88%|████████▊ | 112/128 [00:21<00:03,  5.18it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.3868, -1.3836, -0.6711,  ..., -4.1782, -4.1541, -4.1594],
        [ 1.0632, -0.7442, -0.9868,  ..., -4.1789, -4.1616, -4.1575],
        [ 1.4961, -0.6884, -0.6987,  ..., -4.2367, -4.2162, -4.2354],
        ...,
        [ 1.4140, -0.8989,  0.7242,  ..., -4.2127, -4.2192, -4.2266],
        [ 1.3402,  0.7129,  0.1091,  ..., -3.3691, -3.3745, -3.4130],
        [ 1.2418, -1.5473, -0.9491,  ..., -4.2877, -4.2891, -4.2936]],
       device='cuda:0'), end_logits=tensor([[ 2.3529, -0.9869, -0.7302,  ..., -4.0889, -4.0576, -4.0625],
        [ 1.8980, -1.0068, -1.4127,  ..., -4.0700, -4.0503, -4.0656],
        [ 2.3407, -0.8127, -0.7625,  ..., -4.1357, -4.1189, -4.1304],
        ...,
        [ 1.8264, -1.0692,  0.7226,  ..., -3.8695, -3.8835, -3.9023],
        [ 1.7109,  0.4901, -0.8009,  ..., -3.0694, -3.0634, -3.0888],
        [ 1.4870, -1.3044, -1.3133,  ..., -4.0297, -4.0309, -4.0235]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  89%|████████▉ | 114/128 [00:22<00:02,  5.17it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 0.5289, -1.7559, -1.4572,  ..., -4.3095, -4.2978, -4.2876],
        [ 0.6655, -1.6414, -1.1430,  ..., -4.3117, -4.2996, -4.2998],
        [ 1.1449, -0.9370, -1.0281,  ..., -4.2041, -4.2216, -4.2101],
        ...,
        [ 1.2916, -1.7004, -0.3351,  ..., -4.1194, -4.1170, -4.1231],
        [ 1.0217, -0.7416, -1.1568,  ..., -4.2379, -4.2403, -4.2496],
        [ 1.4433, -1.8325, -0.5364,  ..., -4.0764, -4.0712, -4.0878]],
       device='cuda:0'), end_logits=tensor([[ 1.4089, -1.5920, -1.3628,  ..., -4.0326, -4.0118, -4.0077],
        [ 1.3561, -1.1976, -0.9505,  ..., -4.0525, -4.0219, -4.0173],
        [ 1.8748, -1.0737, -1.2798,  ..., -4.0004, -4.0144, -4.0108],
        ...,
        [ 2.0875, -1.7535, -0.2850,  ..., -4.2584, -4.2474, -4.2496],
        [ 1.8718, -0.7902, -1.3090,  ..., -4.3654, -4.3640, -4.3602],
        [ 2.4216, -1.6391, -1.5894,  ..., -4.4967, -4.4823, -4.4913]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  91%|█████████ | 116/128 [00:22<00:02,  5.14it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.4119,  0.0811,  0.8234,  ..., -4.1600, -4.1567, -4.1697],
        [ 1.6467, -1.1248, -0.9921,  ..., -4.0067, -4.0076, -4.0209],
        [ 1.1360, -0.1809, -0.5279,  ..., -4.3007, -4.3045, -4.2949],
        ...,
        [ 0.9703, -1.1176, -0.2318,  ..., -4.1942, -4.1782, -4.1783],
        [ 1.7230, -0.3517, -0.0518,  ..., -4.1410, -4.1412, -4.1424],
        [ 1.6394, -0.3936, -0.4754,  ..., -4.1203, -4.1159, -4.1141]],
       device='cuda:0'), end_logits=tensor([[ 2.2678, -0.9936,  1.4484,  ..., -4.3078, -4.2997, -4.2921],
        [ 2.3333, -0.8118, -1.1931,  ..., -4.1186, -4.1222, -4.1179],
        [ 1.9179, -0.7436, -1.5911,  ..., -4.3177, -4.3151, -4.2955],
        ...,
        [ 1.9848, -1.2792, -0.1791,  ..., -4.1597, -4.1307, -4.1339],
        [ 2.1471, -1.1663, -0.9877,  ..., -4.1506, -4.1478, -4.1433],
        [ 2.3360, -0.6761, -1.4613,  ..., -4.2784, -4.2626, -4.2644]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  92%|█████████▏| 118/128 [00:22<00:01,  5.15it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.2150, -1.7659, -1.0899,  ..., -4.1148, -4.1254, -4.1193],
        [ 1.2436, -1.7460, -1.2393,  ..., -4.2165, -4.2160, -4.2197],
        [ 1.4540, -1.7955, -1.1656,  ..., -4.2263, -4.2257, -4.2276],
        ...,
        [ 1.4575, -1.7200, -1.5151,  ..., -4.0705, -4.0848, -4.0633],
        [ 1.0172, -1.8669, -1.2839,  ..., -4.3388, -4.3305, -4.3349],
        [ 0.7185, -1.5061, -1.1576,  ..., -4.1740, -4.1759, -4.1660]],
       device='cuda:0'), end_logits=tensor([[ 1.9691, -1.8077, -1.7254,  ..., -4.3560, -4.3589, -4.3488],
        [ 1.9091, -1.6250,  0.1323,  ..., -4.0777, -4.0634, -4.0583],
        [ 2.2022, -1.6983, -1.2337,  ..., -4.0021, -3.9957, -3.9777],
        ...,
        [ 1.9955, -1.5517, -1.5583,  ..., -3.8928, -3.9048, -3.8602],
        [ 1.5286, -1.5963, -1.1049,  ..., -4.0843, -4.0829, -4.0856],
        [ 1.5352, -1.1155, -1.1191,  ..., -3.9345, -3.9411, -3.9220]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  94%|█████████▍| 120/128 [00:23<00:01,  5.16it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 2.1713, -1.1564, -0.9520,  ..., -4.1648, -4.1784, -4.1812],
        [ 1.6624, -0.9069, -1.0787,  ..., -4.0888, -4.1414, -4.1413],
        [ 2.0031, -1.2661, -0.8520,  ..., -4.2222, -4.2307, -4.1879],
        ...,
        [ 1.5166, -1.3566, -0.7911,  ..., -4.1232, -4.1251, -4.1195],
        [ 1.9380, -1.2222,  0.1179,  ..., -4.2744, -4.3033, -4.3063],
        [ 1.6665, -0.1223, -0.3433,  ..., -4.1327, -4.1815, -4.1950]],
       device='cuda:0'), end_logits=tensor([[ 3.0652, -1.2301, -1.5051,  ..., -4.0136, -4.0318, -4.0327],
        [ 2.7759, -1.2841, -1.7398,  ..., -4.1005, -4.1657, -4.1809],
        [ 3.1578, -1.0435, -1.6733,  ..., -3.9872, -3.9987, -3.9448],
        ...,
        [ 2.2986, -1.4164, -0.8317,  ..., -3.9819, -3.9758, -3.9759],
        [ 2.6926, -1.3669,  0.6802,  ..., -3.9861, -4.0069, -4.0079],
        [ 2.3978, -1.0901,  1.2701,  ..., -4.0900, -4.1350, -4.1508]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  95%|█████████▌| 122/128 [00:23<00:01,  5.17it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 2.4565, -0.7554, -0.8287,  ..., -4.1899, -4.1966, -4.1931],
        [ 2.3312, -1.2565,  0.0683,  ..., -4.1576, -4.1604, -4.1543],
        [ 2.1334, -0.7708, -0.6142,  ..., -4.0984, -4.1002, -4.1088],
        ...,
        [ 2.0481, -1.2258, -0.8601,  ..., -4.0866, -4.1037, -4.0951],
        [ 1.7852, -0.3893, -0.8082,  ..., -4.1925, -4.2105, -4.2055],
        [ 1.7242, -1.3221,  0.2854,  ..., -4.1843, -4.1881, -4.1780]],
       device='cuda:0'), end_logits=tensor([[ 3.1224, -1.1914, -1.6586,  ..., -4.1194, -4.1158, -4.1114],
        [ 2.9797, -1.6040,  0.1717,  ..., -4.1358, -4.1378, -4.1342],
        [ 3.1231, -1.2149, -1.4857,  ..., -4.3182, -4.3166, -4.3179],
        ...,
        [ 2.9523, -1.4503, -1.6899,  ..., -4.3011, -4.2933, -4.2969],
        [ 3.1657, -0.8256, -1.2519,  ..., -4.0174, -4.0363, -4.0346],
        [ 3.1416, -0.9878,  0.2173,  ..., -3.8622, -3.8681, -3.8708]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  97%|█████████▋| 124/128 [00:23<00:00,  5.15it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.5291, -0.9939, -0.6519,  ..., -3.9497, -3.9895, -4.0197],
        [ 1.5151, -0.6253, -0.7110,  ..., -4.1900, -4.2005, -4.1988],
        [ 1.4246, -1.4795, -1.3381,  ..., -4.1332, -4.1277, -4.1380],
        ...,
        [ 1.8789, -1.6474, -0.5484,  ..., -4.2296, -4.2618, -4.2610],
        [ 2.0393, -1.2340, -0.1971,  ..., -4.2445, -4.2384, -4.2387],
        [ 1.8932, -0.8178, -1.1032,  ..., -4.1810, -4.1789, -4.1848]],
       device='cuda:0'), end_logits=tensor([[ 2.3558, -1.3608, -0.4295,  ..., -4.0949, -4.1256, -4.1124],
        [ 2.3233, -0.8205, -1.5757,  ..., -4.3596, -4.3675, -4.3631],
        [ 2.0926, -1.1552, -1.5520,  ..., -4.2557, -4.2477, -4.2458],
        ...,
        [ 2.6772, -1.0716,  0.5427,  ..., -3.9552, -4.0138, -4.0112],
        [ 2.6425, -1.1293, -0.5646,  ..., -4.1166, -4.1131, -4.1146],
        [ 2.6752, -0.9189, -1.3335,  ..., -4.1634, -4.1641, -4.1737]],
       device='cuda:0'), hidden_states=None

Evaluating_pred:  98%|█████████▊| 126/128 [00:24<00:00,  5.15it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.1323, -0.9586, -0.9191,  ..., -4.2439, -4.2478, -4.2461],
        [ 1.2705, -1.5363, -0.6011,  ..., -4.2328, -4.2434, -4.2388],
        [ 1.1742, -1.7398, -1.1542,  ..., -4.0995, -4.1205, -4.0873],
        ...,
        [ 1.0028, -1.5596, -0.4446,  ..., -4.2833, -4.3003, -4.3039],
        [ 1.5239, -1.6480, -1.3093,  ..., -4.1145, -4.1351, -4.1122],
        [ 1.6956, -1.6418, -0.4814,  ..., -4.2234, -4.2228, -4.2256]],
       device='cuda:0'), end_logits=tensor([[ 1.5999, -0.9607, -0.9553,  ..., -4.2421, -4.2386, -4.2279],
        [ 1.8144, -1.4704, -0.8407,  ..., -4.2671, -4.2594, -4.2549],
        [ 1.7088, -1.5029, -1.2593,  ..., -3.9498, -3.9562, -3.9363],
        ...,
        [ 1.3536, -1.6123,  0.1127,  ..., -4.1908, -4.1835, -4.1715],
        [ 2.0443, -1.5436, -0.7047,  ..., -4.0362, -4.0464, -4.0212],
        [ 2.2845, -1.7512, -0.5911,  ..., -4.1724, -4.1653, -4.1533]],
       device='cuda:0'), hidden_states=None

Evaluating_pred: 100%|██████████| 128/128 [00:24<00:00,  5.19it/s]

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 1.4357, -0.5832, -0.6467,  ..., -4.1105, -4.1121, -4.1112],
        [ 1.0775,  0.7633,  2.1925,  ..., -4.0069, -4.0067, -4.0189],
        [ 1.7481, -1.4053, -0.8086,  ..., -4.1238, -4.1344, -4.1181],
        ...,
        [ 0.9596, -1.7568, -1.5455,  ..., -4.3129, -4.3169, -4.3156],
        [ 1.0692, -1.8755, -1.0048,  ..., -4.2532, -4.2454, -4.2354],
        [ 1.5928, -1.4061, -1.3555,  ..., -4.1701, -4.1636, -4.1665]],
       device='cuda:0'), end_logits=tensor([[ 1.9839, -1.1459, -1.4283,  ..., -4.1348, -4.1290, -4.1275],
        [ 1.7188, -0.5440,  0.1459,  ..., -4.1969, -4.1896, -4.1823],
        [ 1.3852, -1.4305, -1.3032,  ..., -4.1314, -4.1245, -4.1068],
        ...,
        [ 1.7514, -1.8450, -2.1032,  ..., -4.2892, -4.2622, -4.2503],
        [ 1.8324, -1.6750, -1.4759,  ..., -4.0929, -4.0933, -4.0751],
        [ 2.2064, -1.3238, -1.7339,  ..., -4.1155, -4.1012, -4.1007]],
       device='cuda:0'), hidden_states=None




TypeError: unsupported format string passed to list.__format__

In [None]:
all_acc

## query workspace

In [None]:
# get unlable data
unlabeled_idxs = np.arange(n_pool)[~labeled_idxs]
unlabeled_data = train_dataset.select(indices=unlabeled_idxs)
len(unlabeled_idxs)

### test: query 5 data from 20 unlabeled_data

In [None]:
# smaller data
unlabeled_idxs_20 = unlabeled_idxs[20:40]
unlabeled_data_20 = train_dataset.select(unlabeled_idxs_20)
unlabeled_feature_20 = train_features.select(unlabeled_idxs_20)
len(unlabeled_data_20)

In [None]:
unlabeled_idxs_20

In [None]:
unlabeled_dataloader = DataLoader(
		unlabeled_data_20,
		shuffle=True,
		collate_fn=default_data_collator,
		batch_size=8,
	)
len(unlabeled_dataloader.dataset)

In [None]:
probs_list_dict_20 = get_prob(model, unlabeled_dataloader, device, unlabeled_feature_20, squad['train'])
# len(probs_list_dict_20)

In [None]:
probs_list_dict_20

In [None]:
uncertainties_dict = {}
for idx, probs in probs_list_dict_20.items():
    if len(probs) > 1: # if prob_dict['probs'] is not 0
        sort_probs = np.sort(probs)[::-1] # This method returns a copy of the array, leaving the original array unchanged.
        uncertainties = sort_probs[0] - sort_probs[1]
        uncertainties_dict[idx] = uncertainties
    elif idx:
        uncertainties_dict[idx] = np.array([0])
print('sort_probs:\n', sort_probs)
print('uncertainties_dict:\n', uncertainties_dict)
# deepAL+: return unlabeled_idxs[uncertainties.sort()[1][:n]] 
sorted_uncertainties_list = sorted(uncertainties_dict.items(), key=lambda x: x[1], reverse=True)
    
unlabeled_idxs[[idx for (idx, uncertainties) in sorted_uncertainties_list[:5]]]

In [None]:
probs_list_dict_20_dropout = get_prob_dropout(model, unlabeled_dataloader, device, unlabeled_data_20, squad['train'])

In [None]:
for_check
# the prediction are the same

In [None]:
probs_list_dict_20_dropout

In [None]:
probs_list_dict_20

In [None]:
confidence_list_dict = []
for d in probs_list_dict_20:
    if len(d['probs']) > 1: # if prob_dict['probs'] is not 0
        confidence = max(d['probs'])
        confidence_list_dict.append(
            {'idx': d['idx'], 
                'confidence': confidence}
                )
    elif d['idx']:
        confidence_list_dict.append(
            {'idx': d['idx'], 
                'confidence': np.array([0])}
                )
# deepAL+: return unlabeled_idxs[uncertainties.sort()[1][:n]]
sorted_confidence_dict = sorted(confidence_list_dict, key=lambda d: d['confidence'])   
unlabeled_idxs_20[[confidence_dict['idx'][0] for confidence_dict in sorted_confidence_dict[:5]]]