In [1]:
from llama_index.core.llama_dataset import download_llama_dataset

In [2]:
# download and install dependencies
rag_dataset, documents = download_llama_dataset(
  "BlockchainSolanaDataset", "./solana"
)

In [3]:
# let's see some rows
rag_dataset.to_pandas()[:5]

Unnamed: 0,query,reference_contexts,reference_answer,reference_answer_by,query_by
0,What are the key issues preventing the wide ad...,[From Bitcoin to Solana – Innovating Blockchai...,The key issues preventing the wide adoption of...,ai (gpt-3.5-turbo),ai (gpt-3.5-turbo)
1,How does blockchain technology provide data se...,[From Bitcoin to Solana – Innovating Blockchai...,Blockchain technology provides data security a...,ai (gpt-3.5-turbo),ai (gpt-3.5-turbo)
2,What is the projected growth rate of the block...,[2 \n \nchain market size is expected to grow ...,The projected growth rate of the blockchain ma...,ai (gpt-3.5-turbo),ai (gpt-3.5-turbo)
3,What are some of the challenges facing enterpr...,[2 \n \nchain market size is expected to grow ...,Some of the challenges facing enterprise adopt...,ai (gpt-3.5-turbo),ai (gpt-3.5-turbo)
4,What are the key issues that need to be addres...,[3 \n \nScalability. Scalability is the seco...,The key issues that need to be addressed in or...,ai (gpt-3.5-turbo),ai (gpt-3.5-turbo)


- With documents, you can build your own RAG pipeline, to then predict and perform evaluations to compare against the benchmarks listed in the DatasetCard associated with the datasets

In [4]:
import os
from getpass import getpass
from huggingface_hub import login

In [5]:
HF_TOKEN = getpass()

 ········


In [6]:
# create llm model
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
llm = HuggingFaceInferenceAPI(model_name="mistralai/Mixtral-8x7B-Instruct-v0.1", token=HF_TOKEN)
llm

HuggingFaceInferenceAPI(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x0000029508FCE590>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x000002957BF98E00>, completion_to_prompt=<function default_completion_to_prompt at 0x000002957BFDF4C0>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model_name='mistralai/Mixtral-8x7B-Instruct-v0.1', token='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', timeout=None, headers=None, cookies=None, task=None, context_window=3900, num_output=256, is_chat_model=False, is_function_calling_model=False)

In [7]:
# build basic RAG system
from llama_index.core import VectorStoreIndex
index = VectorStoreIndex.from_documents(documents=documents, embed_model='local')
query_engine = index.as_query_engine(llm=llm)

In [8]:
import tqdm
from llama_index.core.llama_pack import download_llama_pack

In [9]:
# evaluate using the RagEvaluatorPack
RagEvaluatorPack = download_llama_pack(
    "RagEvaluatorPack", "./rag_evaluator_pack"
)

In [10]:
rag_evaluator_pack = RagEvaluatorPack(
    rag_dataset=rag_dataset, query_engine=query_engine, judge_llm=llm
)

In [11]:
rag_evaluator_pack

<llama_index.packs.rag_evaluator.base.RagEvaluatorPack at 0x295102b7110>

In [12]:
# benchmark_df = await rag_evaluator_pack.arun(
#     batch_size=20,  # batches the number of openai api calls to make
#     sleep_time_in_seconds=1,  # seconds to sleep before making an api call
# )

In [13]:
from llama_index.core.evaluation import FaithfulnessEvaluator
evaluator_model = FaithfulnessEvaluator(llm=llm)

In [14]:
import nest_asyncio
nest_asyncio.apply()

In [15]:
response_vector = query_engine.query("What is solana blockchain?")
eval_result = evaluator_model.evaluate_response(response=response_vector)

In [16]:
eval_result.response

'\nSolana is a blockchain system that brings significant improvement to the performance of traditional blockchain and enables the building of scalable and user-friendly applications for the world. It retains all the attributes of traditional blockchain systems but has a much better performance. Solana introduces the Proof of History mechanism to improve the performance of traditional blockchain systems. In the Solana system, there are two types of nodes: Leader and Verifier. The Leader is an elected Proof of History generator, and Solana rotates leaders at fixed intervals. The Leader receives transactions from users and orders them into a Proof of History sequence. Proof of History is a mechanism used in Solana, where the Proof of History sequence is a list of transactions prearranged by a "Leader". The timestamp is embedded in this data structure, and every event has a unique hash and account along this data structure. This data structure tells us what event had come before another, a

In [17]:
eval_result.score

1.0

In [None]:
# Faithfulness is a measure of whether the generated answer is faithful to the retrieved contexts. In other words, it measures whether there is any hallucination in the generated answer.
# This is useful for measuring if the response was hallucinated. The evaluator returns a score between 0 and 1, where 1 means the response is faithful to the retrieved contexts.