In [1]:
import os
import sys

current_dir = os.getcwd()
kit_dir = os.path.abspath(os.path.join(current_dir, ".."))
repo_dir = os.path.abspath(os.path.join(kit_dir, ".."))

sys.path.append(kit_dir)
sys.path.append(repo_dir)

print(f"This is the repo dir {repo_dir}")

This is the repo dir /Users/kwasia/Documents/Projects/ai-starter-kit


In [2]:
import pandas as pd
from utils.eval.rag_eval import (
    RAGEvaluator,
    RAGEvalConfig,
    load_pipeline,
    load_eval_dataframe,
)
from langchain_community.llms.sambanova import SambaStudio, Sambaverse
from langchain_community.embeddings import HuggingFaceInstructEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load config
config_path = "../../utils/eval/config.yaml"
config = RAGEvalConfig(config_yaml_path=config_path)

In [4]:
# Create evaluator
eval_llms = []
for conf in config.eval_llm_configs:
    print("conf:", conf)
    llm_name, llm_config = config.get_llm_config(conf)
    print("llm_name:", llm_name)
    print("llm_config:", llm_config)
    eval_llm = SambaStudio(**llm_config)
    eval_llms.append((llm_name, eval_llm))

print("eval_llms:", eval_llms)

eval_embeddings = HuggingFaceInstructEmbeddings(model_name=config.embedding_model_name)
evaluator = RAGEvaluator(eval_llms, eval_embeddings, config_path)

conf: ('llama38b', {'name': 'llama38b', 'model_kwargs': {'select_expert': 'Meta-Llama-3-8B-Instruct', 'process_prompt': False, 'max_tokens_to_generate': 512}})
llm_name: llama38b
llm_config: {'sambastudio_base_url': None, 'sambastudio_project_id': None, 'sambastudio_endpoint_id': None, 'sambastudio_api_key': None, 'model_kwargs': {'select_expert': 'Meta-Llama-3-8B-Instruct', 'process_prompt': False, 'max_tokens_to_generate': 512}}


ValidationError: 5 validation errors for SambaStudio
sambastudio_base_url
  none is not an allowed value (type=type_error.none.not_allowed)
sambastudio_project_id
  none is not an allowed value (type=type_error.none.not_allowed)
sambastudio_endpoint_id
  none is not an allowed value (type=type_error.none.not_allowed)
sambastudio_api_key
  none is not an allowed value (type=type_error.none.not_allowed)
__root__
  Did not find sambastudio_base_url, please add an environment variable `SAMBASTUDIO_BASE_URL` which contains it, or pass `sambastudio_base_url` as a named parameter. (type=value_error)

In [5]:
# Use Case 1: CSV file with pre-generated answers, no context
eval_df = pd.read_csv("../data/res.csv")
results1 = evaluator.evaluate(eval_df)
print("Results 1:", results1)

Evaluating: 100%|██████████| 3/3 [00:11<00:00,  3.79s/it]
Evaluating: 100%|██████████| 3/3 [00:50<00:00, 16.91s/it]
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[34m[1mwandb[0m: Currently logged in as: [33mkwasi-ankomah[0m ([33mai-solutions-team[0m). Use [1m`wandb login --relogin`[0m to force relogin
huggingface/tokenizers: The

Results 1: {'eval_llm1': {'answer_relevancy': 0.9160, 'answer_correctness': 0.9409, 'answer_similarity': 0.7636}, 'eval_llm2': {'answer_relevancy': 0.9961, 'answer_correctness': 0.2561, 'answer_similarity': 0.7636}}


In [None]:
# Use Case 2: CSV file with pre-generated answers and context
eval_df = pd.read_csv("eval_set_with_answers_and_context.csv")
results2 = evaluator.evaluate(eval_df)
print("Results 2:", results2)

In [None]:
# Use Case 3: CSV file without answers, generate with pipelines, no context
eval_df = pd.read_csv("eval_set.csv")
pipelines = [
    load_pipeline(
        SambaStudio(
            sambastudio_base_url=conf["base_url"],
            sambastudio_project_id=conf["project_id"],
            sambastudio_endpoint_id=conf["endpoint_id"],
            sambastudio_api_key=conf["api_key"],
            **conf["model_kwargs"]
        ),
        config,
    )
    for conf in config.llm_configs
]
results3 = evaluator.evaluate(eval_df, pipelines)
print("Results 3:", results3)

In [None]:
# Use Case 4: CSV file without answers, generate with pipelines, with context from vector DB
eval_df = pd.read_csv("eval_set.csv")
pipelines = [
    load_pipeline(
        SambaStudio(
            sambastudio_base_url=conf["base_url"],
            sambastudio_project_id=conf["project_id"],
            sambastudio_endpoint_id=conf["endpoint_id"],
            sambastudio_api_key=conf["api_key"],
            **conf["model_kwargs"]
        ),
        config,
    )
    for conf in config.llm_configs
]
results4 = evaluator.evaluate(eval_df, pipelines)
print("Results 4:", results4)

In [None]:
# Use Case 5: Evaluate on HF dataset
config.config["eval_dataset"]["hf_dataset_name"] = "squad_v2"
eval_df = load_eval_dataframe(config)
pipelines = [
    load_pipeline(
        SambaStudio(
            sambastudio_base_url=conf["base_url"],
            sambastudio_project_id=conf["project_id"],
            sambastudio_endpoint_id=conf["endpoint_id"],
            sambastudio_api_key=conf["api_key"],
            **conf["model_kwargs"]
        ),
        config,
    )
    for conf in config.llm_configs
]
results5 = evaluator.evaluate(eval_df, pipelines)
print("Results 5:", results5)