In [2]:
from llama_index.evaluation.benchmarks import HotpotQAEvaluator
from llama_index import ServiceContext, VectorStoreIndex
from llama_index.schema import Document
from llama_index.llms import OpenAI
from llama_index import LLMPredictor

llm_predictor = LLMPredictor(OpenAI())

service_context = ServiceContext.from_defaults(
    embed_model="local:sentence-transformers/all-MiniLM-L6-v2",
    llm_predictor=llm_predictor,
)
index = VectorStoreIndex.from_documents(
    [Document.example()], service_context=service_context, show_progress=True
)

  from .autonotebook import tqdm as notebook_tqdm
Parsing documents into nodes: 100%|████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 477.17it/s]
Generating embeddings: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 12.02it/s]


In [3]:
from llama_index.query_engine.multistep_query_engine import MultiStepQueryEngine
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index.indices.query.query_transform.base import StepDecomposeQueryTransform

rerank = SentenceTransformerRerank(top_n=3)

engine = index.as_query_engine(
    service_context=service_context,
    node_postprocessors=[rerank],
)

transform = StepDecomposeQueryTransform(llm_predictor, verbose=True)

multistep_engine = MultiStepQueryEngine(
    query_engine=engine, query_transform=transform, index_summary="Wikipedia"
)

HotpotQAEvaluator().run(
    multistep_engine, queries=5, show_result=True, datasets=["dev_fullwiki"]
)

Dataset: dev_fullwiki downloaded at: /home/jonch/.cache/llama_index/datasets/HotpotQA/dev_fullwiki.json
Evaluating on dataset: dev_fullwiki
-------------------------------------
Loading 5 queries out of 7405 (fraction: 0.00068)
[33;1m[1;3m> Current query: Were Scott Derrickson and Ed Wood of the same nationality?
[0m[38;5;200m[1;3m> New query:  What is the nationality of Scott Derrickson and Ed Wood?
[0m[33;1m[1;3m> Current query: Were Scott Derrickson and Ed Wood of the same nationality?
[0m[38;5;200m[1;3m> New query:  Are Scott Derrickson and Ed Wood both American?
[0m[33;1m[1;3m> Current query: Were Scott Derrickson and Ed Wood of the same nationality?
[0m[38;5;200m[1;3m> New query:  What is the nationality of Ed Wood?
[0mQuestion:  Were Scott Derrickson and Ed Wood of the same nationality?
Response: 
No, Scott Derrickson is American but Ed Wood is not.
Correct answer:  yes
EM: 0 F1: 0
-------------------------------------
[33;1m[1;3m> Current query: What governm

Although the scores themselves does not quite reflect it, the system is quite successful at answering the questions (and is in fact more accurate on some of them) 