# Validation Pipeline

## Import Dependecies

In [None]:
import random
import pandas as pd

from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

## Method(s)

In [None]:
def calculate_mrr_from_chunks(search_results, full_answer):
    """
    MRR kiszámítása a keresőmotor által visszaadott szöveges chunkokból.
    
    :param search_results: A szemantikus kereső által visszaadott találatok listája (chunkok).
    :param full_answer: A teljes, helyes válasz (string).
    :return: MRR érték.
    """
    reciprocal_ranks = []
    
    found_relevant = False
    for rank, chunk in enumerate(search_results, start=1):
        # Ellenőrizzük, hogy a chunk tartalmazza-e a teljes választ (vagy annak egy részét).
        if chunk in full_answer:
            reciprocal_ranks.append(1 / rank)
            found_relevant = True
            break
    
    # Ha nincs releváns találat, adj hozzá 0-t
    if not found_relevant:
        reciprocal_ranks.append(0)

    # MRR kiszámítása
    mrr = sum(reciprocal_ranks) / len(reciprocal_ranks)
    return mrr

In [None]:
def map_found(founds, answer):
    results = []
    real = [1] * len(founds)

    for item in founds:
        if item in answer:
            results.append(1)
        else:
            results.append(0)

    acc = accuracy_score(real, results)
    f1 = f1_score(real, results, average='weighted')
    recall = recall_score(real, results)
    precision = precision_score(real, results, average='weighted')

    return acc, f1, recall, precision

## Load Data

In [None]:
dir_data = "../data/"

In [None]:
medquad = pd.read_json(f"{dir_data}validations/mqdquad.json", orient="records")
medquad.info()
medquad.head()

## Live

In [None]:
# To Do
# We have to check all questions and all answares from the test dataset.

## Demo

In [None]:
question  = medquad["question"].values[10]
real_answer  = medquad["answer"].values[10]
fake_answer = medquad["answer"].values[5] + medquad["answer"].values[20]

In [None]:
fake_answer_sentence = fake_answer.split(".")
fake_answer_sentence = [item.strip() for item in fake_answer_sentence if len(item) > 2]
random.seed(10)
random.shuffle(fake_answer_sentence)
fake_answer_sentence

In [None]:
real_answer_sentence = real_answer.split(".")
real_answer_sentence = [item.strip() for item in real_answer_sentence if len(item) > 2]
random.seed(10)
random.shuffle(real_answer_sentence)
real_answer_sentence

In [None]:
demo_found = (real_answer_sentence[:5] + fake_answer_sentence[:3])
random.seed(22)
random.shuffle(demo_found)
demo_found

In [None]:
acc, f1, recall, precision = map_found(demo_found, real_answer)

print("Accuracy scores: ", acc)
print("f1 scores", f1)
print("Recall scores", recall)
print("Precision scores", precision)
print("Mean Reciprocal Rank (MRR): ", calculate_mrr_from_chunks(demo_found, real_answer))