# QueryEngineEvaluator

In [15]:
%load_ext autoreload
%autoreload 2

import nest_asyncio

nest_asyncio.apply()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [3]:
from llama_index import (
    TreeIndex,
    VectorStoreIndex,
    SimpleDirectoryReader,
    LLMPredictor,
    ServiceContext,
    Response,
)
from llama_index.llms import OpenAI
from llama_index.evaluation import QueryEngineEvaluator
import pandas as pd

pd.set_option("display.max_colwidth", 0)

INFO:numexpr.utils:Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
NumExpr defaulting to 8 threads.


In [4]:
# gpt-3 (davinci)
gpt3 = OpenAI(temperature=0, model="text-davinci-003")
service_context_gpt3 = ServiceContext.from_defaults(llm=gpt3)

# gpt-4
gpt4 = OpenAI(temperature=0, model="gpt-4")
service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)

In [5]:
documents = SimpleDirectoryReader("./test_wiki_data").load_data()

In [6]:
# create tree index
tree_index = TreeIndex.from_documents(documents=documents)

INFO:llama_index.indices.common_tree.base:> Building index from nodes: 3 chunks
> Building index from nodes: 3 chunks


In [7]:
# create vector index
vector_index = VectorStoreIndex.from_documents(
    documents, service_context=ServiceContext.from_defaults(chunk_size=512)
)

In [9]:
# generate questions
from llama_index.evaluation import DatasetGenerator

data_generator = DatasetGenerator.from_documents(documents)
eval_questions = data_generator.generate_questions_from_nodes(10)

len(eval_questions)

chunk_size_limit is deprecated, please specify chunk_size instead


10

In [10]:
eval_questions

['What is the population of New York City as of 2020?',
 'Which borough of New York City has the highest population?',
 'What is the economic significance of New York City?',
 'How did New York City get its name?',
 'What is the significance of the Statue of Liberty in New York City?',
 'How did New York City become a global center for art and culture?',
 'What is the historical importance of the Stonewall Inn in New York City?',
 "What is the role of the New York Stock Exchange in the city's economy?",
 'How did New York City become a major immigration gateway?',
 'What are some of the major tourist attractions in New York City?']

In [46]:
evaluator = QueryEngineEvaluator(service_context_gpt3)

qe = vector_index.as_query_engine()
result_vector = evaluator.evaluate(qe, eval_questions[:3])

INFO:openai:message='OpenAI API response' path=https://api.openai.com/v1/completions processing_ms=1224 request_id=5ba67723f349ff05dbeefec720d4c095 response_code=200
message='OpenAI API response' path=https://api.openai.com/v1/completions processing_ms=1224 request_id=5ba67723f349ff05dbeefec720d4c095 response_code=200
INFO:openai:message='OpenAI API response' path=https://api.openai.com/v1/completions processing_ms=1322 request_id=4f0ff31f6a999ac5ed584ba0df241d8a response_code=200
message='OpenAI API response' path=https://api.openai.com/v1/completions processing_ms=1322 request_id=4f0ff31f6a999ac5ed584ba0df241d8a response_code=200
INFO:openai:message='OpenAI API response' path=https://api.openai.com/v1/completions processing_ms=7850 request_id=212a3f50f7ca22ef6531d1c68b16ab16 response_code=200
message='OpenAI API response' path=https://api.openai.com/v1/completions processing_ms=7850 request_id=212a3f50f7ca22ef6531d1c68b16ab16 response_code=200


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

100%|████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.05it/s]


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

100%|████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.25s/it]
100%|████████████████████████████████████████████████████████████| 1/1 [00:27<00:00, 27.49s/it]


In [47]:
result_vector

{'ragas_score': 0.9153, 'answer_relevancy': 0.8913, 'context_relavency': 0.8653, 'faithfulness': 1.0000}

In [48]:
qe = tree_index.as_query_engine()
result_tree = evaluator.evaluate(qe, eval_questions[:3])

INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 0] Selected node: [1]/[1]
>[Level 0] Selected node: [1]/[1]
INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 1] Selected node: [1]/[1]
>[Level 1] Selected node: [1]/[1]
INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 0] Selected node: [1]/[1]
>[Level 0] Selected node: [1]/[1]
INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 1] Selected node: [2]/[2]
>[Level 1] Selected node: [2]/[2]
INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 0] Selected node: [3]/[3]
>[Level 0] Selected node: [3]/[3]
INFO:llama_index.indices.tree.select_leaf_retriever:>[Level 1] Selected node: [5]/[5]
>[Level 1] Selected node: [5]/[5]
INFO:openai:message='OpenAI API response' path=https://api.openai.com/v1/completions processing_ms=1107 request_id=30cf49be4d9ed960a1569669a6850e19 response_code=200
message='OpenAI API response' path=https://api.openai.com/v1/completions processing_ms=1107 request_id=30cf49be4d9

Map:   0%|          | 0/3 [00:00<?, ? examples/s]

100%|████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.15it/s]


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

100%|████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.61s/it]
100%|████████████████████████████████████████████████████████████| 1/1 [00:22<00:00, 22.37s/it]


In [49]:
result_tree

{'ragas_score': 0.7301, 'answer_relevancy': 0.8850, 'context_relavency': 0.8673, 'faithfulness': 0.5476}