# Evaluate

In [6]:
import json

In [4]:
TRAIN_CORPUS_FPATH = './train_corpus.json'
VAL_CORPUS_FPATH = './val_corpus.json'

TRAIN_QUERIES_FPATH = './train_queries.json'
TRAIN_RELEVANT_DOCS_FPATH = './train_relevant_docs.json'

VAL_QUERIES_FPATH = './val_queries.json'
VAL_RELEVANT_DOCS_FPATH = './val_relevant_docs.json'

In [8]:
with open(TRAIN_CORPUS_FPATH, 'r+') as f:
    train_corpus = json.load(f)

with open(TRAIN_QUERIES_FPATH, 'r+') as f:
    train_queries = json.load(f)

with open(TRAIN_RELEVANT_DOCS_FPATH, 'r+') as f:
    train_relevant_docs = json.load(f)

# with open(VAL_QUERIES_FPATH, 'w+') as f:
#     json.dump(val_queries, f)

# with open(VAL_RELEVANT_DOCS_FPATH, 'w+') as f:
#     json.dump(val_relevant_docs, f)

In [41]:
from tqdm.notebook import tqdm
import pandas as pd

from llama_index import ServiceContext, VectorStoreIndex
from llama_index.schema import TextNode
from llama_index.embeddings import OpenAIEmbedding

In [37]:
def evaluate(
    corpus, 
    queries, 
    relevant_docs, 
    embed_model,
    top_k=5,
    verbose=False,
):
    service_context = ServiceContext.from_defaults(embed_model=embed_model)
    nodes = [TextNode(id_=id_, text=text) for id_, text in corpus.items()] 
    index = VectorStoreIndex(
        nodes, 
        service_context=service_context, 
        show_progress=True
    )
    retriever = index.as_retriever(similarity_top_k=top_k)

    eval_results = []
    for query in tqdm(queries):
        retrieved_nodes = retriever.retrieve(query)
        retrieved_ids = [node.node.node_id for node in retrieved_nodes]
        expected_id = relevant_docs[query][0]
        is_hit = expected_id in retrieved_ids  # assume 1 relevant doc
        
        eval_result = {
            'is_hit': is_hit,
            'retrieved': retrieved_ids,
            'expected': expected_id,
        }
        eval_results.append(eval_result)
    return eval_results

### OpenAI

In [38]:
ada = OpenAIEmbedding()
train_results = evaluate(train_corpus, train_queries, train_relevant_docs, ada)

Generating embeddings:   0%|          | 0/334 [00:00<?, ?it/s]

  0%|          | 0/668 [00:00<?, ?it/s]

In [57]:
df = pd.DataFrame(train_results)

In [78]:
hit_rate = df['is_hit'].mean()
hit_rate

0.016467065868263474

### BAAI/bge-small-en

In [85]:
bge = "local:BAAI/bge-small-en"
train_results = evaluate(train_corpus, train_queries, train_relevant_docs, bge, top_k=10)

Generating embeddings:   0%|          | 0/334 [00:00<?, ?it/s]

  0%|          | 0/668 [00:00<?, ?it/s]

In [88]:
df = pd.DataFrame(train_results)

In [89]:
hit_rate = df['is_hit'].mean()
hit_rate

0.03592814371257485

## Results

### Default chunk size + top-k=10
OpenAI: 192  
local: 161  
local:BAAI/bge-small-en : 175  

### Default chunk size + top-k=5
OpenAI: 184  
local: 137 
local:BAAI/bge-small-en : 158 