# Evaluate

In [6]:
import json

In [105]:
TRAIN_DATASET_FPATH = './data/train_dataset.json'
VAL_DATASET_FPATH = './data/val_dataset.json'

In [91]:
with open(TRAIN_DATASET_FPATH, 'r+') as f:
    train_dataset = json.load(f)

with open(VAL_DATASET_FPATH, 'r+') as f:
    val_dataset = json.load(f)

In [92]:
from tqdm.notebook import tqdm
import pandas as pd

from llama_index import ServiceContext, VectorStoreIndex
from llama_index.schema import TextNode
from llama_index.embeddings import OpenAIEmbedding

In [95]:
def evaluate(
    dataset,
    embed_model,
    top_k=5,
    verbose=False,
):
    corpus = dataset['corpus']
    queries = dataset['queries']
    relevant_docs = dataset['relevant_docs']

    service_context = ServiceContext.from_defaults(embed_model=embed_model)
    nodes = [TextNode(id_=id_, text=text) for id_, text in corpus.items()] 
    index = VectorStoreIndex(
        nodes, 
        service_context=service_context, 
        show_progress=True
    )
    retriever = index.as_retriever(similarity_top_k=top_k)

    eval_results = []
    for query_id, query in tqdm(queries.items()):
        retrieved_nodes = retriever.retrieve(query)
        retrieved_ids = [node.node.node_id for node in retrieved_nodes]
        expected_id = relevant_docs[query_id][0]
        is_hit = expected_id in retrieved_ids  # assume 1 relevant doc
        
        eval_result = {
            'is_hit': is_hit,
            'retrieved': retrieved_ids,
            'expected': expected_id,
            'query': query_id,
        }
        eval_results.append(eval_result)
    return eval_results

### OpenAI

In [98]:
ada = OpenAIEmbedding()
train_results = evaluate(train_dataset, ada)

Generating embeddings:   0%|          | 0/334 [00:00<?, ?it/s]

  0%|          | 0/668 [00:00<?, ?it/s]

In [99]:
df = pd.DataFrame(train_results)

In [100]:
hit_rate = df['is_hit'].mean()
hit_rate

0.8817365269461078

### BAAI/bge-small-en

In [102]:
bge = "local:BAAI/bge-small-en"
train_results = evaluate(train_dataset, bge)

Generating embeddings:   0%|          | 0/334 [00:00<?, ?it/s]

  0%|          | 0/668 [00:00<?, ?it/s]

In [103]:
df = pd.DataFrame(train_results)

In [104]:
hit_rate = df['is_hit'].mean()
hit_rate

0.8008982035928144

## Results

### Default chunk size + top-k=10
OpenAI: 192  
local: 161  
local:BAAI/bge-small-en : 175  

### Default chunk size + top-k=5
OpenAI: 184  
local: 137 
local:BAAI/bge-small-en : 158 