<a href="https://colab.research.google.com/github/steffi-priyanka/Question_Answering_Model/blob/main/Finetuned_RoBERTa_QA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install datasets
!pip install transformers datasets
!pip install Gradio
!pip install accelerate -U
!pip install transformers[torch]
!pip install evaluate


Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xxhash, dill, multiprocess, datasets
Successfully installed datasets

In [5]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
from transformers import TrainingArguments,Trainer
from datasets import load_dataset,load_metric
from huggingface_hub import notebook_login
from tqdm.auto import tqdm
import gradio as gr
from evaluate import load
import evaluate
import numpy as np
import collections


In [6]:
#Dataset Selection - SQuAD dataset
squad_dataset = load_dataset("squad")
##Load Model And Tokenizer
model_name="deepset/roberta-base-squad2"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)


In [7]:
# Preprocessing the training data
def preprocess_Training_function(examples):

    """Courtesy of https://huggingface.co/docs/transformers/tasks/question_answering"""

    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=384,
        truncation="only_second",
        stride=128,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    offset_mapping = inputs.pop("offset_mapping")
    sample_map = inputs.pop("overflow_to_sample_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []

    for i, offset in enumerate(offset_mapping):
        sample_idx = sample_map[i]
        answer = answers[sample_idx]
        start_char = answer["answer_start"][0]
        end_char = answer["answer_start"][0] + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label it (0, 0)
        if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

# Some questions in the  SQUAD dataset have extra spaces at the beginning and at the end. Remove extra spaces

train_dataset = squad_dataset["train"].map(
    preprocess_Training_function,
    batched=True,
    remove_columns=squad_dataset["train"].column_names
)
#__________________________________________________________________________________________________

# pre-processing_validation dataset

def preprocess_validation_examples(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        max_length=384,
        truncation="only_second",
        stride=128,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    sample_map = inputs.pop("overflow_to_sample_mapping")
    example_ids = []

    for i in range(len(inputs["input_ids"])):
        sample_idx = sample_map[i]
        example_ids.append(examples["id"][sample_idx])

        sequence_ids = inputs.sequence_ids(i)
        offset = inputs["offset_mapping"][i]
        inputs["offset_mapping"][i] = [
            o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
        ]

    inputs["example_id"] = example_ids
    return inputs

# Some questions in the  SQUAD dataset have extra spaces at the begining and at the end. Remove extra spaces
validation_dataset = squad_dataset["validation"].map(
    preprocess_validation_examples,
    batched=True,
    remove_columns=squad_dataset["validation"].column_names
)

Map:   0%|          | 0/87599 [00:00<?, ? examples/s]

Map:   0%|          | 0/10570 [00:00<?, ? examples/s]

In [8]:
# To Compute Metrics
n_best = 20
max_answer_length = 30
squad_metric = evaluate.load("squad_v2")

def compute_metrics(start_logits, end_logits, features, examples):
    example_to_features = collections.defaultdict(list)
    for idx, feature in enumerate(features):
        example_to_features[feature["example_id"]].append(idx)

    predicted_answers = []
    for example in tqdm(examples):
        example_id = example["id"]
        context = example["context"]
        answers = []

        # Loop through all features associated with that example
        for feature_index in example_to_features[example_id]:
            start_logit = start_logits[feature_index]
            end_logit = end_logits[feature_index]
            offsets = features[feature_index]["offset_mapping"]

            start_indexes = np.argsort(start_logit)[-1 : -n_best - 1 : -1].tolist()
            end_indexes = np.argsort(end_logit)[-1 : -n_best - 1 : -1].tolist()
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # Skip answers that are not fully in the context
                    if offsets[start_index] is None or offsets[end_index] is None:
                        continue
                    # Skip answers with a length that is either < 0 or > max_answer_length
                    if (
                        end_index < start_index
                        or end_index - start_index + 1 > max_answer_length
                    ):
                        continue

                    answer = {
                        "text": context[offsets[start_index][0] : offsets[end_index][1]],
                        "logit_score": start_logit[start_index] + end_logit[end_index],
                    }
                    answers.append(answer)

        # Select the answer with the best score
        if len(answers) > 0:
            best_answer = max(answers, key=lambda x: x["logit_score"])
            predicted_answers.append(
                {"id": example_id, "prediction_text": best_answer["text"],"no_answer_probability": 0.0}
            )
        else:
            predicted_answers.append({"id": example_id, "prediction_text": ""})

    theoretical_answers = [{"id": ex["id"], "answers": ex["answers"]} for ex in examples]
    return squad_metric.compute(predictions=predicted_answers, references=theoretical_answers)
#_________________________________________________________________________________________________________

Downloading builder script:   0%|          | 0.00/6.47k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/11.3k [00:00<?, ?B/s]

In [9]:
# Setting-up training arguments
training_args = TrainingArguments(
    output_dir="./fine_tuned_roberta",
    evaluation_strategy="epoch",
    learning_rate=1e-5,
    #per_device_train_batch_size=16,
    #per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    fp16=True,     # speeds up the training on a GPU
    push_to_hub=True
)
#__________________________________________________________________________________________________

# Setup trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=tokenizer,
    # data_collator=data_collator,
    # compute_metrics=compute_metrics                    #To monitor metrics during training
)

# To Train the module
trainer.train()  # took 45 mins to train in v100 GPU


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss
1,0.7079,No log
2,0.538,No log
3,0.4584,No log


TrainOutput(global_step=33213, training_loss=0.5810154149429881, metrics={'train_runtime': 4068.7791, 'train_samples_per_second': 65.302, 'train_steps_per_second': 8.163, 'total_flos': 5.207010717113395e+16, 'train_loss': 0.5810154149429881, 'epoch': 3.0})

In [11]:
# Converting Examples to Features

example_to_features = collections.defaultdict(list)
for idx, feature in enumerate(validation_dataset):
    example_to_features[feature["example_id"]].append(idx)


In [12]:
#To Save the module
trainer.save_model(trainer.save_model("Roberta_model_1"))
#To Evaluate the model after training
predictions, _, _ = trainer.predict(validation_dataset)
start_logits, end_logits= predictions
compute_metrics(start_logits, end_logits, validation_dataset, squad_dataset["validation"])


  0%|          | 0/10570 [00:00<?, ?it/s]

{'exact': 85.58183538315988,
 'f1': 92.12761875352038,
 'total': 10570,
 'HasAns_exact': 85.58183538315988,
 'HasAns_f1': 92.12761875352038,
 'HasAns_total': 10570,
 'best_exact': 85.58183538315988,
 'best_exact_thresh': 0.0,
 'best_f1': 92.12761875352038,
 'best_f1_thresh': 0.0}

In [14]:
# Check your Fine_tuned Model
qa_pipeline = pipeline("question-answering", model="Roberta_model_1")
QA_input = {
    'question': 'Why is model conversion important?',
    'context': 'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.'
}
answer = qa_pipeline(QA_input)

In [15]:
answer

{'score': 0.4067554175853729,
 'start': 59,
 'end': 132,
 'answer': 'gives freedom to the user and let people easily switch between frameworks'}

In [16]:
# Gradio app Launch
def func(context,question):
  result=qa_pipeline(question=question,context=context)
  return result['answer']

# Creating GUI Interface
demo = gr.Interface(fn=func, inputs = ['textbox', 'text'],
                   outputs = 'textbox', title = 'Question Answering bot',
                   theme = 'default', description = 'Input context and question, then get answers!',
                    allow_flagging="manual")
# Launching the Application
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://d64bc3d59d8190882a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


