# Evaluate IR (Information Retrieval)

In [1]:
import sys
sys.path.append("../..")
from datasets import load_dataset
from src.service.provider import ProviderService

provider = ProviderService()

QA_REPO = "BroDeadlines/QA.FQA_tu_van_hoc_duong"
SPLIT = "INDEX.medium_index_TDT"

## Run IR

In [2]:
QA_REPO = "BroDeadlines/QA.FQA_tu_van_hoc_duong"
SPLIT = "INDEX.medium_index_TDT"
qa_dataset = load_dataset(QA_REPO, split=SPLIT)

In [3]:
qa_dataset

Dataset({
    features: ['question', 'answer', 'url', 'group', 'doc_id', 'metadata'],
    num_rows: 144
})

In [4]:
qa_dataset[0]['metadata']

'{"shards": 1}'

In [2]:
DATA_REPO = "BroDeadlines/TEST.basic_test_tdt_dataset"
index_dataset = load_dataset(DATA_REPO, split=SPLIT)

In [6]:
index_dataset

Dataset({
    features: ['content', 'url', 'doc_id', 'shards', 'splits'],
    num_rows: 344
})

## Update shards

In [25]:
ids = {}

def build_ids(row):
    ids[row['doc_id']] = 1
    return
    
qa_dataset.map(build_ids)
len(ids)

Map:   0%|          | 0/144 [00:00<?, ? examples/s]

0

In [26]:
shards = {}

def build_shards(row):
    if row['doc_id'] not in ids:
        return
    data[row['doc_id']] = row['shards']
    return
    
index_dataset.map(build_shards)
len(data)

Map:   0%|          | 0/344 [00:00<?, ? examples/s]

144

In [29]:
import json

qa_dataset_updated = qa_dataset.map(lambda e: {**e, "metadata": json.dumps({'shards': data[e['doc_id']]})})

Map:   0%|          | 0/144 [00:00<?, ? examples/s]

In [33]:
metas = {}

def build_meta(row):
    metas[row['doc_id']] = row['metadata']
    pass

qa_dataset_updated.map(build_meta)
len(metas)

Map:   0%|          | 0/144 [00:00<?, ? examples/s]

144

In [32]:

qa_dataset_updated.push_to_hub(QA_REPO, split=SPLIT)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/785 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/BroDeadlines/QA.FQA_tu_van_hoc_duong/commit/293b6e18f98f99f46239da08961939357d5f88c1', commit_message='Upload dataset', commit_description='', oid='293b6e18f98f99f46239da08961939357d5f88c1', pr_url=None, pr_revision=None, pr_num=None)

In [31]:
qa_dataset['train'] = qa_dataset['train'].map(lambda e: {**e, "metadata": ""})

Map:   0%|          | 0/170 [00:00<?, ? examples/s]

In [24]:
qa_dataset['TEST.basic_test_tdt_dataset'] = qa_dataset['TEST.basic_test_tdt_dataset'].remove_columns(['shards'])
# dir(a)

In [32]:
qa_dataset.push_to_hub("BroDeadlines/QA.FQA_tu_van_hoc_duong")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/529 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/BroDeadlines/QA.FQA_tu_van_hoc_duong/commit/c2e94ef7977fb20e5b69ae47e42a4ac2f811a40c', commit_message='Upload dataset', commit_description='', oid='c2e94ef7977fb20e5b69ae47e42a4ac2f811a40c', pr_url=None, pr_revision=None, pr_num=None)

## Run RAG

In [7]:
from src.rag.hyde_rag import HydeRAG, HydeHybridSearchRAG
from src.utils.config_utils import get_gemini_hyde_config

# rag = HydeRAG(provider=provider, index="test-basic_test_tdt_dataset")
config = get_gemini_hyde_config()
config.vector_index = "vec-index.medium_index_tdt"
config.text_index = "text-index.medium_index_tdt"

rag = HydeRAG(provider=provider, index=config.vector_index)
# rag = HydeHybridSearchRAG(provider=provider,config=config, k=4)

In [5]:
dcs = rag.search("ngành CNTT")

In [6]:
len(dcs)

8

In [9]:
def eval(row):
    docs = rag.search(question=row['question'])
    if (docs == None):
        row['evaluation'] = []
        return row
    result = [{'content': d[0].page_content, "doc_id": d[0].metadata['doc_id'], "score": d[1]} for d in docs]
    row['evaluation'] = result
    return row

def eval_no_score(row):
    docs = rag.search(question=row['question'])
    if (docs == None):
        row['evaluation'] = []
        return row
    result = [{'content': d.page_content, "doc_id": d.metadata['doc_id'], "score": 0} for d in docs]
    row['evaluation'] = result
    return row

test_ds = qa_dataset.map(eval)

Map:   0%|          | 0/144 [00:00<?, ? examples/s]

In [36]:
len(metas)

144

In [37]:
def update_meta(row):
    row['metadata'] = metas[row['doc_id']]
    return row
    
test_ds_updated = test_ds.map(update_meta)

Map:   0%|          | 0/144 [00:00<?, ? examples/s]

In [54]:
test_ds_updated

Dataset({
    features: ['question', 'answer', 'url', 'group', 'doc_id', 'metadata', 'evaluation'],
    num_rows: 144
})

In [58]:
def update_score(row):
    tmp = row['evaluation']
    n_tmp = [{**i, "score": -1.1} for i in tmp]
    row['evaluation'] = n_tmp
    return row

test_ds_updated_one = test_ds_updated.map(update_score)

Map:   0%|          | 0/144 [00:00<?, ? examples/s]

In [10]:
SPLIT

'INDEX.medium_index_TDT'

In [13]:
test_ds.push_to_hub("BroDeadlines/EVAL.IR_evaluation", split=f"{SPLIT}.hyde_vector")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.32k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/BroDeadlines/EVAL.IR_evaluation/commit/76a4692e2a248cd2970f17f8942d992f0bdfc890', commit_message='Upload dataset', commit_description='', oid='76a4692e2a248cd2970f17f8942d992f0bdfc890', pr_url=None, pr_revision=None, pr_num=None)

In [60]:
test_ds

Dataset({
    features: ['question', 'answer', 'url', 'group', 'doc_id', 'metadata', 'evaluation'],
    num_rows: 144
})

In [19]:
test_ds[6]['metadata']

''

# Evaluation IR

In [14]:
EVAL_REPO = "BroDeadlines/EVAL.IR_evaluation"
split = f"{SPLIT}.hyde_vector"
eval_dataset = load_dataset(EVAL_REPO, split=split)

Downloading readme:   0%|          | 0.00/1.51k [00:00<?, ?B/s]

Downloading data: 100%|████████████████████████████████████████| 40.7k/40.7k [00:00<00:00, 50.2kB/s]
Downloading data: 100%|███████████████████████████████████████████| 632k/632k [00:00<00:00, 849kB/s]
Downloading data: 100%|███████████████████████████████████████████| 129k/129k [00:01<00:00, 124kB/s]


Generating TEST.basic_test_tdt_dataset split:   0%|          | 0/20 [00:00<?, ? examples/s]

Generating INDEX.medium_index_TDT split:   0%|          | 0/144 [00:00<?, ? examples/s]

Generating INDEX.medium_index_TDT.hyde_vector split:   0%|          | 0/144 [00:00<?, ? examples/s]

In [15]:
print(SPLIT)
len(eval_dataset)

INDEX.medium_index_TDT


144

In [16]:
from src.utils.eval_utils import evaluate_IR

res = evaluate_IR(eval_dataset=eval_dataset)
res

nothing to evaluate


{'relevant': 0.16783216783216784,
 'precision': 0.2823529411764706,
 'recall': 0.16783216783216784,
 'map_score': 0.09790209790209793}