# Installing Transformers and Torch

In [81]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [82]:
pip install torch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Installing Datasets

In [83]:
pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Loading Datasets

In [7]:
from datasets import load_dataset

dataset = load_dataset("squad", "plain_text", split="train")

Downloading builder script:   0%|          | 0.00/5.27k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.36k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.67k [00:00<?, ?B/s]

Downloading and preparing dataset squad/plain_text to /root/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/8.12M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

Dataset squad downloaded and prepared to /root/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453. Subsequent calls will reuse this data.


# Getting bert-base-uncased Model

In [8]:
from transformers import BertTokenizer, BertForQuestionAnswering, RobertaTokenizer, RobertaForQuestionAnswering
from transformers.data.processors.squad import SquadV2Processor
from transformers.data.metrics.squad_metrics import compute_predictions_logits

In [52]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the tokenizer and model for BERT-uncased

In [53]:
# Load the tokenizer and model for BERT-uncased
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
bert_model.to(device)
bert_model.train()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_out

BertForQuestionAnswering(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elem

# Load the tokenizer and model for RoBERTa

In [54]:
# Load the tokenizer and model for RoBERTa
roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta_model = RobertaForQuestionAnswering.from_pretrained('roberta-base')
roberta_model.to(device)
roberta_model.train()

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForQuestionAnswering: ['lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inf

RobertaForQuestionAnswering(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (Lay

# Load the tokenizer and model for RoBERTa base squad2

In [89]:
# Load the tokenizer and model for RoBERTa base squad2
roberta_squad_tokenizer = RobertaTokenizer.from_pretrained('deepset/roberta-base-squad2')
roberta_squad_model = RobertaForQuestionAnswering.from_pretrained('deepset/roberta-base-squad2')
roberta_squad_model.to(device)
roberta_squad_model.train()

RobertaForQuestionAnswering(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (Lay

In [50]:
pip install torch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [51]:
import torch

# Generate predictions function

In [95]:
# Function to generate predictions from a model
def generate_predictions(dataset, tokenizer, model):
    predictions = []
    for example in dataset:
        inputs = tokenizer.encode_plus(example["question"], example["context"], add_special_tokens=True, return_tensors="pt")
        input_ids = inputs["input_ids"].to(model.device)
        attention_mask = inputs["attention_mask"].to(model.device)

        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            start_logits = outputs.start_logits
            end_logits = outputs.end_logits

        start_index = torch.argmax(start_logits)
        end_index = torch.argmax(end_logits)

        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[0][start_index:end_index + 1]))
        predictions.append(answer.strip())

    return predictions

# Load the SQuAD validation dataset

In [96]:
# Load the SQuAD validation dataset
datasetv = load_dataset("squad", "plain_text", split="validation[0:500]")



# Generate predictions using RoBERTa

In [59]:
# Generate predictions using RoBERTa
roberta_predictions = generate_predictions(datasetv, roberta_tokenizer, roberta_model)

# Generate predictions using Bert




In [60]:
# Generate predictions using Bert
bert_predictions = generate_predictions(datasetv, bert_tokenizer, bert_model)

# Generate predictions using roberta-base-squad-2

In [97]:
roberta_squad_predictions = generate_predictions(datasetv, roberta_squad_tokenizer, roberta_squad_model)

# true_answers in validation dataset

In [98]:
true_answers=[]
for ans in datasetv:
  true_answers.append(ans["answers"]["text"][0])
  #print(ans["answers"]["text"])

In [66]:
type(roberta_predictions)

list

In [70]:
type(true_answers)

list

# getting exact match count

In [99]:
# Assume you have a list of true answers and a list of predicted answers
predicted_answers = roberta_squad_predictions

# Prepare the examples and predictions for squad_evaluate
exact_match_count = 0
total_examples = len(true_answers)

for true_answer, predicted_answer in zip(true_answers, predicted_answers):
    if true_answer == predicted_answer:
        exact_match_count += 1

exact_match_score = exact_match_count / total_examples

# Print the exact match score
print("Exact match score:", exact_match_score)

Exact match score: 0.71


In [100]:
print(roberta_squad_predictions[:5])

['Denver Broncos', 'Denver Broncos', "Levi's Stadium in the San Francisco Bay Area at Santa Clara, California", 'Denver Broncos', 'gold']


In [101]:
print(true_answers[:5])

['Denver Broncos', 'Carolina Panthers', 'Santa Clara, California', 'Denver Broncos', 'gold']
