Complete evaluation pipeline with evaluation.

In [None]:
import re
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
import spacy
import nltk
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu

In [None]:

sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
nlp = spacy.load("en_core_web_trf")  

In [None]:
df = pd.read_excel("questions_answers.xlsx")

ocr_text = """ OCR TEXT HERE   """

cleaned_text = re.sub(r'\s+', ' ', ocr_text)
cleaned_text = re.sub(r'[^\w\s.]', '', cleaned_text)  
cleaned_text = cleaned_text.strip()
print("Cleaned Text:", cleaned_text)

In [None]:
# NER using SpaCy
doc = nlp(cleaned_text)
entities = [ent.text for ent in doc.ents]
ner_text = " ".join(entities) 
print("\nNER Extracted Text:", ner_text)

sentences = ner_text.split(". ")    

In [None]:
sentence_embeddings = sentence_model.encode(sentences, convert_to_tensor=True)

results = []


def calculate_bleu(reference, candidate):
    reference = [reference.split()]
    candidate = candidate.split()
    return sentence_bleu(reference, candidate)


def calculate_rouge(reference, candidate):
    scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
    score = scorer.score(reference, candidate)
    return score

In [None]:
# Looping through questions and answers in the dataframe to evaluate and save it in an excel file.
for idx, row in df.iterrows():
    question = row['Question']
    correct_answer = row['Original Answer']

    question_embedding = sentence_model.encode(question, convert_to_tensor=True)

    cosine_scores = util.cos_sim(question_embedding, sentence_embeddings)

    top_k = 3  
    top_results = cosine_scores.topk(top_k)
    relevant_sentences = [sentences[i] for i in top_results.indices[0]]

    context = " ".join(relevant_sentences)
    result = qa_pipeline(question=question, context=context)
    generated_answer = result["answer"]

    bleu_score = calculate_bleu(correct_answer, generated_answer)
    rouge_score = calculate_rouge(correct_answer, generated_answer)

    results.append({
        'Question': question,
        'Generated Answer': generated_answer,
        'BLEU Score': bleu_score,
        'ROUGE Score': rouge_score['rouge1'].fmeasure,

    })

results_df = pd.DataFrame(results)

results_df.to_csv("generated_answers_with_scores.csv", index=False)
