# Validation Pipeline

## Import Dependecies

In [10]:
import random
import pandas as pd

import nltk
from nltk.tokenize import sent_tokenize
nltk.download('punkt_tab')

from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

from llama_index.core import Settings
from llama_index.core import StorageContext 
from llama_index.core import load_index_from_storage
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

from tqdm import tqdm

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\opell\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [2]:
# Set LLM to None
Settings.llm = None

# Set Hugging Face embedding model for LlamaIndex
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L12-v2")

LLM is explicitly disabled. Using MockLLM.


## Method(s)

In [3]:
def calculate_mrr_from_chunks(search_results, full_answer):
    """
    MRR kiszámítása a keresőmotor által visszaadott szöveges chunkokból.
    
    :param search_results: A szemantikus kereső által visszaadott találatok listája (chunkok).
    :param full_answer: A teljes, helyes válasz (string).
    :return: MRR érték.
    """
    reciprocal_ranks = []
    
    found_relevant = False
    for rank, chunk in enumerate(search_results, start=1):
        # Ellenőrizzük, hogy a chunk tartalmazza-e a teljes választ (vagy annak egy részét).
        if chunk in full_answer:
            reciprocal_ranks.append(1 / rank)
            found_relevant = True
            break
    
    # Ha nincs releváns találat, adj hozzá 0-t
    if not found_relevant:
        reciprocal_ranks.append(0)

    # MRR kiszámítása
    mrr = sum(reciprocal_ranks) / len(reciprocal_ranks)
    return mrr

In [4]:
def map_found(founds, answer):
    results = []
    real = [1] * len(founds)

    for item in founds:
        if item in answer:
            results.append(1)
        else:
            results.append(0)

    acc = accuracy_score(real, results)
    f1 = f1_score(real, results, average='weighted')
    recall = recall_score(real, results)
    precision = precision_score(real, results, average='weighted')

    return acc, f1, recall, precision

In [5]:
def get_response(resp):
    return resp.split("\n---------------------\n")[1].split("\n\n")

## Load Data

In [6]:
dir_data = "../data/"

In [7]:
medquad = pd.read_json(f"{dir_data}validations/mqdquad.json", orient="records")
medquad.info()
medquad.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16407 entries, 0 to 16406
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   question  16407 non-null  object
 1   answer    16407 non-null  object
dtypes: object(2)
memory usage: 256.5+ KB


Unnamed: 0,question,answer
0,What is (are) keratoderma with woolly hair ?,Keratoderma with woolly hair is a group of rel...
1,How many people are affected by keratoderma wi...,Keratoderma with woolly hair is rare; its prev...
2,What are the genetic changes related to kerato...,"Mutations in the JUP, DSP, DSC2, and KANK2 gen..."
3,Is keratoderma with woolly hair inherited ?,Most cases of keratoderma with woolly hair hav...
4,What are the treatments for keratoderma with w...,These resources address the diagnosis or manag...


## Live

['Keratoderma with woolly hair is a group of related conditions that affect the skin and hair and in many cases increase the risk of potentially life-threatening heart problems.',
 'People with these conditions have hair that is unusually coarse, dry, fine, and tightly curled.',
 'In some cases, the hair is also sparse.',
 'The woolly hair texture typically affects only scalp hair and is present from birth.',
 'Starting early in life, affected individuals also develop palmoplantar keratoderma, a condition that causes skin on the palms of the hands and the soles of the feet to become thick, scaly, and calloused.',
 'Cardiomyopathy, which is a disease of the heart muscle, is a life-threatening health problem that can develop in people with keratoderma with woolly hair.',
 'Unlike the other features of this condition, signs and symptoms of cardiomyopathy may not appear until adolescence or later.',
 'Complications of cardiomyopathy can include an abnormal heartbeat (arrhythmia), heart fai

### Sentence Based

In [16]:
storage_context_sb = StorageContext.from_defaults(persist_dir="../data/vectors/sentence_based")
index_sb = load_index_from_storage(storage_context_sb)
query_engine_sb = index_sb.as_query_engine(similarity_top_k=1)

In [17]:
resps = get_response(query_engine_sb.query(medquad["question"].values[0]).response)
resps

['Keratoderma with woolly hair is a group of related conditions that affect the skin and hair and in many cases increase the risk of potentially life-threatening heart problems.']

In [15]:
map_found(resps, sent_tokenize(medquad["answer"].values[0]))

(0.4, 0.5714285714285715, 0.4, 1.0)

## Demo

In [None]:
question  = medquad["question"].values[10]
real_answer  = medquad["answer"].values[10]
fake_answer = medquad["answer"].values[5] + medquad["answer"].values[20]

In [None]:
fake_answer_sentence = fake_answer.split(".")
fake_answer_sentence = [item.strip() for item in fake_answer_sentence if len(item) > 2]
random.seed(10)
random.shuffle(fake_answer_sentence)
fake_answer_sentence

In [None]:
real_answer_sentence = real_answer.split(".")
real_answer_sentence = [item.strip() for item in real_answer_sentence if len(item) > 2]
random.seed(10)
random.shuffle(real_answer_sentence)
real_answer_sentence

In [None]:
demo_found = (real_answer_sentence[:5] + fake_answer_sentence[:3])
random.seed(22)
random.shuffle(demo_found)
demo_found

In [None]:
acc, f1, recall, precision = map_found(demo_found, real_answer)

print("Accuracy scores: ", acc)
print("f1 scores", f1)
print("Recall scores", recall)
print("Precision scores", precision)
print("Mean Reciprocal Rank (MRR): ", calculate_mrr_from_chunks(demo_found, real_answer))