# **Evaluate**

    Synthetic Dataset Form을 가지고 진행해야 함.
    - Query
    - Corpus
    - Relavant Docs

In [1]:
# !pip install llama_index
# !pip install sentence_transformers

Collecting llama_index
  Downloading llama_index-0.9.14.post1-py3-none-any.whl.metadata (8.2 kB)
Collecting aiohttp<4.0.0,>=3.8.6 (from llama_index)
  Using cached aiohttp-3.9.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (7.4 kB)
Collecting deprecated>=1.2.9.3 (from llama_index)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl.metadata (5.4 kB)
Collecting nest-asyncio<2.0.0,>=1.5.8 (from llama_index)
  Downloading nest_asyncio-1.5.8-py3-none-any.whl.metadata (2.8 kB)
Collecting wrapt<2,>=1.10 (from deprecated>=1.2.9.3->llama_index)
  Downloading wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.6 kB)
Downloading llama_index-0.9.14.post1-py3-none-any.whl (943 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m943.4/943.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hUsing cached aiohttp-3.9.1-cp310-cp310-macosx_11_0_arm64.whl (386 kB)
Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Downloadin

In [2]:
import json
from tqdm.notebook import tqdm
import pandas as pd

from llama_index import ServiceContext, VectorStoreIndex
from llama_index.schema import TextNode
from llama_index.embeddings import OpenAIEmbedding

In [3]:
def evaluate(dataset, embed_model, top_k=5, verbose=False):
    corpus = dataset['corpus']
    queries = dataset['queries']
    relevant_docs = dataset['relevant_docs']

    service_context = ServiceContext.from_defaults(embed_model=embed_model)
    nodes = [TextNode(id_=id_, text=text) for id_, text in corpus.items()]
    index = VectorStoreIndex(
        nodes,
        service_context=service_context,
        show_progress=True
    )
    retriever = index.as_retriever(similarity_top_k=top_k)

    eval_results = []
    for query_id, query in tqdm(queries.items()):
        retrieved_nodes = retriever.retrieve(query)
        retrieved_ids = [node.node.node_id for node in retrieved_nodes]
        expected_id = relevant_docs[query_id][0]
        is_hit = expected_id in retrieved_ids  # assume 1 relevant doc

        eval_result = {
            'is_hit': is_hit,
            'retrieved': retrieved_ids,
            'expected': expected_id,
            'query': query_id,
        }
        eval_results.append(eval_result)
    return eval_results

In [4]:
from sentence_transformers.evaluation import InformationRetrievalEvaluator
from sentence_transformers import SentenceTransformer

def evaluate_st(dataset, model_id, name):
    corpus = dataset['corpus']
    queries = dataset['queries']
    relevant_docs = dataset['relevant_docs']

    evaluator = InformationRetrievalEvaluator(queries, corpus, relevant_docs, name=name)
    model = SentenceTransformer(model_id)
    return evaluator(model, output_path='results/')

  from .autonotebook import tqdm as notebook_tqdm


# **Run**

In [None]:
bge = "local:BM-K/KoSimCSE-roberta-multitask"
### pretrained embedding model config
model = SentenceTransformer('BM-K/KoSimCSE-roberta-multitask')
results = evaluate(corpus, model)

In [None]:
df_res = pd.DataFrame(results)

In [None]:
hit_rate_bge = df_res['is_hit'].mean()
hit_rate_bge

In [None]:
evaluate_st(val_corpus, "BM-K/KoSimCSE-roberta-multitask", name='bge')