In [1]:
from llama_index.core.llama_dataset import download_llama_dataset
from llama_index.core.llama_pack import download_llama_pack
from llama_index.core import VectorStoreIndex, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
import yaml
from pydantic import BaseModel

In [2]:
with open('config.yml') as config_file:
    config_data = yaml.safe_load(config_file)

class DatasetConfig(BaseModel):
    dataset_class: str
    download_dir: str
    
dataset_config = DatasetConfig(**config_data['dataset'])

class LocalModelConfig(BaseModel):
    embedding_model_name: str
    llm_model_name: str

local_model_config = LocalModelConfig(**config_data['local_model'])

In [3]:
Settings.embed_model = HuggingFaceEmbedding(model_name=local_model_config.embedding_model_name)
Settings.llm = Ollama(model=local_model_config.llm_model_name, request_timeout=360.0)

In [4]:
rag_dataset, documents = download_llama_dataset(
    llama_dataset_class=dataset_config.dataset_class, 
    download_dir=dataset_config.download_dir,
    show_progress=True
)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:14<00:00,  1.36it/s]
Loading files: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:50<00:00,  2.53s/file]


In [5]:
index = VectorStoreIndex.from_documents(
    documents=documents,
    show_progress=True
)

Parsing nodes:   0%|          | 0/1037 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/1159 [00:00<?, ?it/s]

In [6]:
query_engine = index.as_query_engine()

In [7]:
RagEvaluatorPack = download_llama_pack(
    "RagEvaluatorPack", "./rag_evaluator_pack"
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Processing /Users/gpabon/src/repos/dstoolkit-genai-shap/notebooks/rag_evaluator_pack
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: llama-index-packs-rag-evaluator
  Building wheel for llama-index-packs-rag-evaluator (pyproject.toml): started
  Building wheel for llama-index-packs-rag-evaluator (pyproject.toml): finished with status 'done'
  Created wheel for llama-index-packs-rag-evaluator: filename=llama_index_packs_rag_evaluator-0.3.0-py3-none-any.whl size=4991 sha256=dd93b69aff969b2600249cffc6c86586c1b96b862917602d435950cefb381569
  Stored in directory: /private/var/folders/wp/r73wwy856dxb48v4t1cgtn640000gn/T/pip-ephem-wheel-cache-fri3p88j/wheels/43/7b/f1/d26032193

In [13]:
rag_evaluator_pack = RagEvaluatorPack(
    rag_dataset=rag_dataset, 
    query_engine=query_engine,
    show_progress=True
)

In [None]:
benchmark_df = await rag_evaluator_pack.arun(
    batch_size=100,  # batches the number of openai api calls to make
    sleep_time_in_seconds=0,  # seconds to sleep before making an api call
)

Batch processing of predictions:  40%|████████████████████████████████████████████████████████████████████▊                                                                                                       | 40/100 [05:41<07:00,  7.01s/it]

In [11]:
rag_dataset.to_pandas()

Unnamed: 0,query,reference_contexts,reference_answer,reference_answer_by,query_by
0,How has Apple's total net sales changed over t...,,"Based on the provided documents, Apple's total...",ai (gpt-4-turbo (with human review)),human
1,What are the major factors contributing to the...,,In the most recent 10-Q for the quarter ended ...,ai (gpt-4-turbo (with human review)),human
2,Has there been any significant change in Apple...,,"Yes, there has been a change in Apple's operat...",ai (gpt-4-turbo (with human review)),human
3,How has Apple's revenue from iPhone sales fluc...,,The revenue from iPhone sales for Apple has fl...,ai (gpt-4-turbo (with human review)),human
4,Can any trends be identified in Apple's Servic...,,"Based on the provided documents, there is a tr...",ai (gpt-4-turbo (with human review)),human
...,...,...,...,...,...
190,"For Amazon's Q1 2023 10-Q, align the details o...",,"In Amazon's Q1 2023 10-Q, the details of debt ...",ai (gpt-4-turbo (with human review)),human
191,Analyze how Amazon's effective tax rate report...,,The effective tax rate for Amazon as reported ...,ai (gpt-4-turbo (with human review)),human
192,"From Amazon's Q3 2023 10-Q, how does the opera...",,The operational expenses section in Amazon's Q...,ai (gpt-4-turbo (with human review)),human
193,"In the latest 10-Q, how does the revenue from ...",,The latest 10-Q does not provide specific info...,ai (gpt-4-turbo (with human review)),human
