In [None]:
!apt install libpoppler-cpp-dev
!pip install pdftotext
!pip install transformers
!python -m nltk.downloader punkt
!pip install sentencepiece

import pdftotext
import re

import itertools
from nltk import sent_tokenize
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

from transformers import AutoModelForQuestionAnswering

import random

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libpoppler-cpp-dev is already the newest version (22.02.0-2ubuntu0.4).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
class TextProcessingAndContextCreation :

    @classmethod
    def get_context_chunks(cls, file_name) :

        raw_text = cls._get_raw_text_from_pdf(file_name)

        first_page_number = cls._get_the_first_page_number(raw_text)
        if first_page_number == -1 :
            print("Error encountered")
            return []

        raw_text = raw_text[first_page_number:]

        cls._remove_header_of_first_page(raw_text)

        cls._delete_footer_notes(raw_text)

        cls._remove_captial_words(raw_text)

        text = cls._join_all_pages(raw_text)

        text = cls._split_the_text_on_bold_points(text)

        cls._filter_and_get_plain_text(text)

        contexts =  cls._get_the_contexts(text)
        return contexts

    @classmethod
    def _get_raw_text_from_pdf(cls, file_name) :

        with open(file_name, "rb") as f :
            pdf_text = pdftotext.PDF(f)

        raw_text = []
        for i in pdf_text :
            raw_text.append(str(i))
        return raw_text

    @classmethod
    def _get_the_first_page_number(cls, raw_text) :

        index = -1
        try :
            index = raw_text[0].split().index('ACT,')
        except :
            year_regex = r"[0-9]{4}"
            for i in range(len(raw_text[0].split())) :
                if (re.match(year_regex, raw_text[0].split()[i])) is not None :
                    index = i
                    break

        if index == -1 :
            return -1

        match_text_page_zero = " ".join(raw_text[0].split()[:index + 1])
        for i in range(1, len(raw_text)) :
            page_match_text = " ".join(raw_text[i].split()[:index + 1])
            if match_text_page_zero == page_match_text :
                return i

        return -1

    @classmethod
    def _remove_header_of_first_page(cls, raw_text) :

        temp = re.match(r"(.|\n)*?\[.*\]", raw_text[0])
        raw_text[0] = raw_text[0][temp.end():]

    @classmethod
    def _delete_footer_notes(cls, raw_text) :

        footer_regex = r"1\..*"
        for i in range(len(raw_text)) :
            temp = raw_text[i].split('\n')[:-2]
            for j in range(len(temp) - 1, -1, -1) :
                if re.match(footer_regex, temp[j]) is not None :
                    break
            if j != 0 :
                temp = temp[:j]
            raw_text[i] = "\n".join(temp)

    @classmethod
    def _remove_captial_words(cls, raw_text) :

        for i in range(len(raw_text)) :
            raw_text[i] = raw_text[i].split('\n')
            raw_text[i] = list(filter(lambda x: not (x.isupper()), raw_text[i]))
            raw_text[i] = "\n".join(raw_text[i])

    @classmethod
    def _join_all_pages(cls, raw_text) :

        concatenated_text = []
        for i in raw_text :
            concatenated_text.append(i)
        concatenated_text = "\n".join(concatenated_text)
        return concatenated_text

    @classmethod
    def _split_the_text_on_bold_points(cls, text) :

        split_regex = r"[0-9]+[A-Z]*\.[^\n]"
        split_text = re.split(split_regex, text)
        return split_text

    @classmethod
    def _filter_and_get_plain_text(cls, text) :

        for i in range(len(text)) :
            text[i] = re.sub(r"(“|”|’)", "\"", text[i])
            text[i] = re.sub(r"\n", " ", text[i])
            text[i] = re.sub(r" +", " ", text[i])
            text[i] = re.sub(r"––", "- ", text[i])
            text[i] = re.sub(r"—", "- ", text[i])

    @classmethod
    def _get_the_contexts(cls, text) :

        contexts = []
        for i in range(len(text)) :
            if (len(text[i].split()) < 20) :
                continue
            elif (len(text[i].split()) >= 20 and len(text[i].split()) <= 350) :
                contexts.append(text[i])
            else :
                temp = cls._get_contexts_greater_than_max_size(text[i])
                for i in temp :
                    contexts.append(" ".join(i))

        # return contexts
        return contexts

    @classmethod
    def _get_contexts_greater_than_max_size(cls, text) :

        ROMAN_NUMERALS = ['i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', \
                          'xi', 'xii', 'xiii', 'xiv', 'xv', 'xvi', 'xvii', 'xviii', 'xix', 'xx', \
                          'xxi', 'xxii', 'xxiii', 'xxiv', 'xxv', 'xxvi', 'xxvii', 'xxviii', 'xxix', 'xxx' \
                          'xxxi', 'xxxii', 'xxxiii', 'xxxiv', 'xxxv', 'xxxvi', 'xxxvii', 'xxxviii', 'xxxix', 'xl' \
                          'xli', 'xlii', 'xliii', 'xliv', 'xlv', 'xlvi', 'xlvii', 'xlviii', 'xlix', 'l', \
                          'li', 'lii', 'liii', 'liv', 'lv', 'lvi', 'lvii', 'lviii', 'lix', 'lx']

        level_wise_regex = [r"\([0-9]+\)", r"\([a-z]{1,2}\)", r"\((" + "|".join(ROMAN_NUMERALS) + r")\)" , r"\([A-Z]\)"]
        punctuations = ",-.:;"
        end_markers = ".;"
        current_index = 0
        regex_match_flag = 0
        max_context_length = 0

        contexts = []
        text = text.split()

        while (current_index < len(text)) :

            regex_match_flag = 0
            max_context_length = min(current_index + 350, len(text))
            context = text[current_index:max_context_length]
            if max_context_length == len(text) :
                contexts.append(context)
                current_index += len(context)
            else :
                i = len(context) - 1
                while i > (len(context) // 2) :

                    # Try matching one of the regular expression
                    if (re.match(level_wise_regex[0], context[i]) is not None) or \
                        (re.match(level_wise_regex[1], context[i]) is not None) or \
                        (re.match(level_wise_regex[2], context[i]) is not None) or \
                        (re.match(level_wise_regex[3], context[i]) is not None) :
                        # The previous word should end with punctuation
                        if i != 0 and context[i-1][-1] in punctuations :
                            context = context[:i]
                            contexts.append(context)
                            current_index += i
                            regex_match_flag = 1
                            break
                    i -= 1

                if not regex_match_flag :
                    i = len(context) - 1
                    while (i > 0) :
                        if context[i][-1] in end_markers :
                            context = context[:i + 1]
                            contexts.append(context)
                            current_index += (i + 1)
                            break
                        i -= 1

            if len(contexts[-1]) < 20 :
                del contexts[-1]

        return contexts


In [None]:
import itertools

from nltk import sent_tokenize

import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

PIPELINE_SETTINGS = {
    #"model": "valhalla/t5-base-qg-hl",
    "model": "mrm8488/t5-base-finetuned-question-generation-ap",    # Question Generation model
    "ans_model": ["valhalla/t5-base-qa-qg-hl"]                      # List of Ans extraction models. We can add more than one.
}

In [None]:
class QGPipeline:

    def __init__(self, pipeline_settings: dict = PIPELINE_SETTINGS, use_cuda: bool = True) :

        self.model = AutoModelForSeq2SeqLM.from_pretrained(pipeline_settings['model'])
        self.tokenizer = AutoTokenizer.from_pretrained(pipeline_settings['model'], use_fast=False)

        self.ans_model = []
        self.ans_tokenizer = []
        for i in range(len(pipeline_settings['ans_model'])) :
            self.ans_model.append(AutoModelForSeq2SeqLM.from_pretrained(pipeline_settings['ans_model'][i]))
            self.ans_tokenizer.append(AutoTokenizer.from_pretrained(pipeline_settings['ans_model'][i], use_fast=False))

        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
        self.model.to(self.device)
        for i in range(len(self.ans_model)) :
            if self.ans_model[i] is not self.model:
                self.ans_model[i].to(self.device)


    def __call__(self, text : str):

        input_text = " ".join(text.split())
        answers = self._extract_answers(input_text)
        if len(answers) == 0:
          return []

        questions = self._generate_questions(answers, input_text)
        question_answers_list = []
        for question, answer in zip(questions, answers) :
            question_answers_list.append({'question': question, 'answer': answer})
        return question_answers_list


    def _extract_answers(self, context):

        inputs = self._prepare_inputs_for_ans_extraction(context)
        inputs = self._tokenize(inputs, padding=True, truncation=True)

        answers = []
        for i in range(len(self.ans_model)) :
            outs = self.ans_model[i].generate(
                input_ids=inputs['input_ids'].to(self.device),
                attention_mask=inputs['attention_mask'].to(self.device),
                max_length=64,
            )

            dec = [self.ans_tokenizer[i].decode(ids, skip_special_tokens=False) for ids in outs]
            decoded_output = [item.split('<sep>') for item in dec]
            decoded_output = [i[0] for i in decoded_output]
            answers.extend(decoded_output)

        for i in range(len(answers)) :
            answers[i] = answers[i].replace("<pad> ", "")
        answers = list(set(answers))
        return answers


    def _prepare_inputs_for_ans_extraction(self, text):

        sents = sent_tokenize(text)
        inputs = []
        for i in range(len(sents)) :
            # Append the text 'extract answers' to each sample
            source_text = "extract answers:"
            for j, sent in enumerate(sents):
                if i == j:
                    sent = "<hl> %s <hl>" % sent
                source_text = "%s %s" % (source_text, sent)
                source_text = source_text.strip()

            source_text = source_text + " </s>"
            inputs.append(source_text)

        return inputs


    def _tokenize(self, inputs, padding=True, truncation=True, add_special_tokens=True, max_length=512):

        inputs = self.ans_tokenizer[0].batch_encode_plus(
            inputs,
            max_length=max_length,
            add_special_tokens=add_special_tokens,
            truncation=truncation,
            padding="max_length" if padding else False,
            pad_to_max_length=padding,
            return_tensors="pt"
        )
        return inputs


    def _generate_questions(self, answers, context):

        questions = []
        for answer in answers :
            input_text = "answer: %s  context: %s </s>" % (answer, context)
            inputs = self._tokenize([input_text], padding=True, truncation=True)

            outs = self.model.generate(
                input_ids=inputs['input_ids'].to(self.device),
                attention_mask=inputs['attention_mask'].to(self.device),
                max_length=64,
                num_beams=4,
            )
            questions.extend([self.tokenizer.decode(ids, skip_special_tokens=True) for ids in outs])

        for i in range(len(questions)) :
            questions[i] = questions[i].replace("question: ", "")
        return questions

In [None]:
#
#-----------Question Answering Module-----------
#
import torch
import numpy as np
import transformers
from transformers import AutoModelForQuestionAnswering, AutoTokenizer


class QA() :

    def __init__(self) :
        self.model_file = "bert-large-uncased-whole-word-masking-finetuned-squad"
        self.model = AutoModelForQuestionAnswering.from_pretrained(self.model_file)
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_file)
        assert isinstance(self.tokenizer, transformers.PreTrainedTokenizerFast)

    def answer_batch(self, questions, contexts, best_size=20, max_answer_length=100) :
        max_length = 480    #max length of input(question + context)
        doc_stride = 128    #length of overlap between consecutive features of the same example

        encodings = self.tokenizer(
                questions,
                contexts,
                truncation="only_second",
                max_length=max_length,
                stride=doc_stride,
                return_overflowing_tokens=True,
                return_offsets_mapping=True,
                return_attention_mask=True,
                padding="max_length"
            )

        cuda_device = torch.device("cuda")
        input_ids = torch.tensor(encodings.input_ids, device=cuda_device)
        token_type_ids=torch.tensor(encodings.token_type_ids, device=cuda_device)
        attention_mask=torch.tensor(encodings.attention_mask, device=cuda_device)

        scores = self.model(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask);
        all_start_logits = (scores.start_logits).cpu()
        all_end_logits = (scores.end_logits).cpu()

        del input_ids
        del token_type_ids
        del attention_mask
        del scores

        context_mapping = encodings.pop("overflow_to_sample_mapping")
        offset_mappings = encodings.pop("offset_mapping")
        context_features = dict()
        for i, c in enumerate(context_mapping):
            if(c not in context_features.keys()):
                context_features[c] = list()
            context_features[c].append(i)

        best_answers = list()
        for c, context in enumerate(contexts):
            valid_answers = []
            feature_indices = context_features[c]

            for feature_index in feature_indices:
                start_logits = all_start_logits[feature_index]
                end_logits = all_end_logits[feature_index]

                offset_mapping = offset_mappings[feature_index]

                start_indexes = np.argsort(start_logits.detach().numpy())[-1 : -best_size - 1 : -1].tolist()
                end_indexes = np.argsort(end_logits.detach().numpy())[-1 : -best_size - 1 : -1].tolist()
                for start_index in start_indexes:
                    for end_index in end_indexes:
                        if (
                            start_index >= len(offset_mapping)
                            or end_index >= len(offset_mapping)
                            or offset_mapping[start_index] is None
                            or offset_mapping[end_index] is None
                        ) : continue

                        if end_index < start_index or end_index - start_index + 1 > max_answer_length:
                            continue

                        start_char = offset_mapping[start_index][0]
                        end_char = offset_mapping[end_index][1]
                        valid_answers.append(
                            {
                                "score": start_logits[start_index] + end_logits[end_index],
                                "text": context[start_char: end_char]
                            }
                        )

            if len(valid_answers) > 0:
                best_answer = sorted(valid_answers, key=lambda x: x["score"], reverse=True)[0]
            else:
                best_answer = {"text": "<no_answer>", "score": 0.0}
            best_answers.append(best_answer)

        return best_answers

    def answer(self, questions, contexts) :
        batch_size = 32
        cnt_batches = len(contexts)//batch_size + (1 if len(contexts)%batch_size != 0 else 0)
        best_anss = []

        self.model.cuda()

        for b in range(cnt_batches):
            torch.cuda.empty_cache()

            with torch.no_grad() :
                result = self.answer_batch(questions[b*batch_size : (b+1)*batch_size], contexts[b*batch_size : (b+1)*batch_size])

            best_anss = best_anss + result

        answer = sorted(best_anss, key=lambda x: x["score"], reverse=True)[0]["text"]
        return answer

In [None]:
class Main :

    qg = QGPipeline()
    qa = QA()

    @classmethod
    def get_contexts_given_the_doc(cls, doc_name) :

        contexts = TextProcessingAndContextCreation.get_context_chunks(doc_name)
        return contexts


    @classmethod
    def get_questions_given_the_contexts(cls, context_list) :

        generated_questions_and_contexts = []
        for context in context_list :
            questions = cls.qg(context)
            for j in questions :
                generated_questions_and_contexts.append([j['question'], context])

        return generated_questions_and_contexts


    @classmethod
    def get_answer_given_the_question_and_context_list(cls, question_context_list) :

        answers = []
        for i in question_context_list :
            answers.append(cls.qa.answer([i[0]], [i[1]]))

        return answers


    @classmethod
    def get_answer_for_single_question_given_context_list(cls, question, context_list) :

        repeated_question_list = [question] * len(context_list)
        answer = cls.qa.answer(repeated_question_list, context_list)
        return answer

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means tha

In [None]:
# doc_name = "/content/The Sexual Harassment of Women at Workplace Act, 2013.pdf"
#doc_name = "/content/The Aadhaar (Targeted Delivery of Financial and Other Subsidies, Benefits and Services) Act, 2016.pdf"
doc_name = "/content/The Consumer Protection Act, 2019.pdf"

contexts = Main.get_contexts_given_the_doc(doc_name)
contexts = contexts[:5]
generated_questions_and_contexts = Main.get_questions_given_the_contexts(contexts)
answers = Main.get_answer_given_the_question_and_context_list(generated_questions_and_contexts)



In [None]:
for i, j in zip(generated_questions_and_contexts, answers) :
    print(i[0])
    print(j)

Who enacted the Act in the Seventieth Year of the Republic of India?
Parliament
An Act to provide for protection of the interests of who?
consumers
When shall the Consumer Protection Act, 2019 come into force?
on such date1
What does the Consumer Protection Act apply to?
all goods and services
What is the exception to the Consumer Protection Act, 2019?
Jammu and Kashmir
What may this act be called?
Consumer Protection Act, 2019
What does "advertisement" mean?
any audio or visual publicity, representation, endorsement or pronouncement made by means of light, sound, smoke, gas, print, electronic media, internet or website
Who means a person who knows that the goods are unsafe to the public?
trader
What does the term "commercial purpose" not include use by a person of goods bought and used by him exclusively for?
earning his livelihood
What does the expression "buys any goods" mean?
offline or online transactions through electronic means or by teleshopping or direct selling or multi-level

In [None]:
# doc_name = "/content/The Sexual Harassment of Women at Workplace Act, 2013.pdf"
doc_name = "/content/The Consumer Protection Act, 2019.pdf"
# question = '''What does "employee" mean without the knowledge of the principal employer?'''

contexts = Main.get_contexts_given_the_doc(doc_name)
contexts = contexts[:5]


question = '''What is "Transitional provision"?'''
#question = '''What does "advertisement" mean?''

contexts = Main.get_contexts_given_the_doc(doc_name)
answer = Main.get_answer_for_single_question_given_context_list(question, contexts)
print(answer)

every reference therein to the decree shall be construed as reference to the order made under this Act


In [None]:
questions = ['''What is the purpose of the Consumer Protection Act, 2019?''',
             '''What is the penalty for false or misleading advertisements?''',
             '''What are the rights of a consumer under the act?''',
             '''What are the powers and functions of the Central Consumer Protection Authority?''',
             '''What is the jurisdiction of the District Consumer Disputes Redressal Commission?''',
             '''What is "Transitional provision"?'''
             ]
#question = '''What does "advertisement" mean?''

contexts = Main.get_contexts_given_the_doc(doc_name)
for i in questions:
  answer = Main.get_answer_for_single_question_given_context_list(i, contexts)
  print(answer)
# answer = Main.get_answer_for_single_question_given_context_list(question, contexts)
# print(answer)


It extends to the whole of India except the State of Jammu and Kashmir
imprisonment
to call for the records and pass appropriate orders
Power of Central Authority to issue directions and penalties against false or misleading advertisements
each district of the State
every reference therein to the decree shall be construed as reference to the order made under this Act


In [None]:
pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [None]:
from rouge import Rouge
def calculate_rouge_scores(hypothesis, reference):
  assert len(hypothesis) == len(reference), "Both lists must have the same number of elements."
  rouge = Rouge()
  scores = rouge.get_scores(hypothesis, reference, avg=True)
  print("ROUGE-1: ", scores['rouge-1'])
  print("ROUGE-2: ", scores['rouge-2'])
  print("ROUGE-L: ", scores['rouge-l'])


predicted_answers = [
    "It extends to the whole of India except the State of Jammu and Kashmir",
    "imprisonment",
    "to call for the records and pass appropriate orders",
    "Power of Central Authority to issue directions and penalties against false or misleading advertisements",
    "each district of the State",
    "every reference therein to the decree shall be construed as reference to the order made under this Act"
]

# Ground truth answers
correct_answers = [
    "The purpose of the Consumer Protection Act, 2019 is to protect and enforce the rights of consumers, prevent violations of consumer rights, ensure no one engages in unfair trade practices or false/misleading advertisements, and provide mechanisms for redressal of consumer disputes .",
    "The penalty for false or misleading advertisements can include a penalty up to ten lakh rupees for each instance. For every subsequent contravention, the penalty can extend up to fifty lakh rupees. Additionally, the endorser can be prohibited from endorsing any product for up to one year, which may extend to three years for subsequent contraventions.",
    "The rights of a consumer under the act include protection against marketing of hazardous goods, being informed about the quality and standard of goods or services, access to a variety of goods or services at competitive prices, the right to be heard, to seek redressal against unfair trade practices, and the right to consumer awareness.",
    "The Central Consumer Protection Authority has the power to enforce consumer rights, prevent unfair trade practices and false advertisements, conduct investigations, handle consumer complaints, review consumer-related issues, promote consumer rights awareness, and issue safety notices for hazardous goods and services.",
    "The jurisdiction of the District Consumer Disputes Redressal Commission includes handling complaints where the value of goods or services does not exceed one crore rupees. They can entertain complaints within their local jurisdiction where the opposite party resides or carries on business, or where the cause of action arises.",
    "The Transitional provision refers to the conditions under which any person appointed as President or as a member of the District Commission before the commencement of this Act will continue to hold office until the completion of their term."
]

# Assuming you have a function to evaluate these based on the earlier provided code
calculate_rouge_scores(predicted_answers, correct_answers)

ROUGE-1:  {'r': 0.09966783947047105, 'p': 0.31587301587301586, 'f': 0.1472591922375079}
ROUGE-2:  {'r': 0.012395118230358505, 'p': 0.0625, 'f': 0.01901234473866306}
ROUGE-L:  {'r': 0.08982987650750808, 'p': 0.2928571428571428, 'f': 0.13350032698928097}


In [None]:
from collections import Counter

def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    import re, string
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)
    def white_space_fix(text):
        return ' '.join(text.split())
    def remove_punct(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)
    def lower(text):
        return text.lower()
    return white_space_fix(remove_articles(remove_punct(lower(s))))

def f1_score(prediction, ground_truth):
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if num_same == 0:
        return 0
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1

# List of predicted and ground truth answers
predicted_answers = [
    "It extends to the whole of India except the State of Jammu and Kashmir",
    "imprisonment",
    "to call for the records and pass appropriate orders",
    "Power of Central Authority to issue directions and penalties against false or misleading advertisements",
    "each district of the State",
    "every reference therein to the decree shall be construed as reference to the order made under this Act"
]

correct_answers = [
    "The purpose of the Consumer Protection Act, 2019 is to protect and enforce the rights of consumers, prevent violations of consumer rights, ensure no one engages in unfair trade practices or false/misleading advertisements, and provide mechanisms for redressal of consumer disputes .",
    "The penalty for false or misleading advertisements can include a penalty up to ten lakh rupees for each instance. For every subsequent contravention, the penalty can extend up to fifty lakh rupees. Additionally, the endorser can be prohibited from endorsing any product for up to one year, which may extend to three years for subsequent contraventions.",
    "The rights of a consumer under the act include protection against marketing of hazardous goods, being informed about the quality and standard of goods or services, access to a variety of goods or services at competitive prices, the right to be heard, to seek redressal against unfair trade practices, and the right to consumer awareness.",
    "The Central Consumer Protection Authority has the power to enforce consumer rights, prevent unfair trade practices and false advertisements, conduct investigations, handle consumer complaints, review consumer-related issues, promote consumer rights awareness, and issue safety notices for hazardous goods and services.",
    "The jurisdiction of the District Consumer Disputes Redressal Commission includes handling complaints where the value of goods or services does not exceed one crore rupees. They can entertain complaints within their local jurisdiction where the opposite party resides or carries on business, or where the cause of action arises.",
    "The Transitional provision refers to the conditions under which any person appointed as President or as a member of the District Commission before the commencement of this Act will continue to hold office until the completion of their term."
]

# Calculate F1 scores for each pair
f1_scores = [f1_score(pred, truth) for pred, truth in zip(predicted_answers, correct_answers)]

# Print each F1 score
for i, score in enumerate(f1_scores):
    print(f"F1 Score for QA pair {i + 1}: {score:.3f}")


F1 Score for QA pair 1: 0.160
F1 Score for QA pair 2: 0.000
F1 Score for QA pair 3: 0.071
F1 Score for QA pair 4: 0.308
F1 Score for QA pair 5: 0.083
F1 Score for QA pair 6: 0.245
