# Evaluate IR (Information Retrieval)

In [1]:
import sys
sys.path.append("../..")
from datasets import load_dataset
from src.service.provider import ProviderService

provider = ProviderService()

## Run IR

In [2]:
QA_REPO = "BroDeadlines/QA.FQA_tu_van_hoc_duong"
qa_dataset = load_dataset(QA_REPO)

In [3]:
qa_dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'url', 'group', 'doc_id'],
        num_rows: 170
    })
    test: Dataset({
        features: ['question', 'answer', 'url', 'group', 'doc_id'],
        num_rows: 20
    })
})

In [4]:
DATA_REPO = "BroDeadlines/TEST.basic_test_tdt_dataset"
index_dataset = load_dataset(DATA_REPO)

In [5]:
qa_test_set = index_dataset.filter(lambda e: e['url'] == "https://tuvanhocduong.tdtu.edu.vn/News")
qa_test_set

DatasetDict({
    train: Dataset({
        features: ['content', 'url', 'doc_id', 'shards', 'splits'],
        num_rows: 20
    })
})

In [6]:
data = {}

def build_shards(row):
    data[row['doc_id']] = row['shards']
    return
    
qa_test_set.map(build_shards)
len(data)

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

20

In [28]:
import json

qa_dataset['TEST.basic_test_tdt_dataset'] = qa_dataset['TEST.basic_test_tdt_dataset'].map(lambda e: {**e, "metadata": json.dumps({'shards': data[e['doc_id']]})})

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

In [29]:

qa_dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'url', 'group', 'doc_id', 'metadata'],
        num_rows: 170
    })
    TEST.basic_test_tdt_dataset: Dataset({
        features: ['question', 'answer', 'url', 'group', 'doc_id', 'metadata'],
        num_rows: 20
    })
})

In [31]:
qa_dataset['train'] = qa_dataset['train'].map(lambda e: {**e, "metadata": ""})

Map:   0%|          | 0/170 [00:00<?, ? examples/s]

In [24]:
qa_dataset['TEST.basic_test_tdt_dataset'] = qa_dataset['TEST.basic_test_tdt_dataset'].remove_columns(['shards'])
# dir(a)

In [32]:
qa_dataset.push_to_hub("BroDeadlines/QA.FQA_tu_van_hoc_duong")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/529 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/BroDeadlines/QA.FQA_tu_van_hoc_duong/commit/c2e94ef7977fb20e5b69ae47e42a4ac2f811a40c', commit_message='Upload dataset', commit_description='', oid='c2e94ef7977fb20e5b69ae47e42a4ac2f811a40c', pr_url=None, pr_revision=None, pr_num=None)

In [9]:
from src.rag.hyde_rag import HydeRAG
rag = HydeRAG(provider=provider, index="test-basic_test_tdt_dataset")

In [17]:
def eval(row):
    docs = rag.search(question=row['question'])
    result = [{'content': d[0].page_content, "doc_id": d[0].metadata['doc_id'], "score": d[1]} for d in docs]
    row['evaluation'] = result
    return row

test_ds = qa_dataset['test'].map(eval)



Map:   0%|          | 0/20 [00:00<?, ? examples/s]

In [21]:
test_ds.push_to_hub("BroDeadlines/EVAL.IR_evaluation", split="TEST.basic_test_tdt_dataset")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/554 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/BroDeadlines/EVAL.IR_evaluation/commit/79b98f300a74e6220d413e0b2f890dfdc1faef08', commit_message='Upload dataset', commit_description='', oid='79b98f300a74e6220d413e0b2f890dfdc1faef08', pr_url=None, pr_revision=None, pr_num=None)

## Evaluation IR

In [2]:
EVAL_REPO = "BroDeadlines/EVAL.IR_evaluation"
SPLIT = "TEST.basic_test_tdt_dataset"

eval_dataset = load_dataset(EVAL_REPO, split=SPLIT)

In [3]:
len(eval_dataset)

20

In [3]:
from src.utils.eval_utils import evaluate_IR

res = evaluate_IR(eval_dataset=eval_dataset)
res

{'precision': 0.625, 'recall': 0.5, 'map_score': 0.35416666666666663}