In [None]:
%pip install okareo attrs dateutils

In [2]:

# perform a test run using a scenario set loaded in the previous cell 
import os
import random
import string
from okareo import Okareo
from okareo.model_under_test import ChromaDb, CohereModel, OpenAIModel
from okareo_api_client.models import TestRunType

# generate a random string to use in the test run name
rnd = "".join(random.choices(string.ascii_lowercase, k=5))

# API Key for Okareo runs
OKAREO_API_KEY = os.environ["OKAREO_API_KEY"]
COHERE_API_KEY = os.environ["COHERE_API_KEY"]

okareo = Okareo(OKAREO_API_KEY)
scenario = okareo.upload_scenario_set(file_path='./webbizz_10_questions.jsonl',
                                      scenario_name="RAG with Webbizz articles")

# register both models
model_under_test_cohere = okareo.register_model(
    name=f"rag_cohere_chromadb",
    model=[
        ChromaDb(collection_name="test-collection-cohere"),
        CohereModel(
            model_id="embed-english-v2.0",
            model_type="embed",
            input_type="search_query",
        ),
    ],
)

model_under_test_openai = okareo.register_model(
    name=f"rag_openai_chromadb",
    model=[
        ChromaDb(collection_name="test-collection-openai"),
        OpenAIModel(
            model_id="text-embedding-ada-002",
            model_type="embed",
            system_prompt_template="This is a question answering engine",
            temperature=0.0,
        ),
    ],
)

# fetch metrics for both the models
result_cohere = model_under_test_cohere.run_test_v2(
    name=f"rag-cohere-chromadb-{rnd}",
    scenario=scenario,
    calculate_metrics=True,
    test_run_type=TestRunType.INFORMATION_RETRIEVAL,
    api_keys={
        "chromadb": "",
        "cohere": os.getenv("COHERE_API_KEY"),
    },
)

result_openai = model_under_test_openai.run_test_v2(
    name=f"rag-openai-chromadb-{rnd}",
    scenario=scenario,
    calculate_metrics=True,
    test_run_type=TestRunType.INFORMATION_RETRIEVAL,
    api_keys={
        "chromadb": "",
        "openai": os.getenv("OPENAI_API_KEY"),
    },
)

# display model level metrics for the test run
print(f"Metrics with OpenAI model: {result_openai.model_metrics.to_dict()}")
print(f"Metrics with Cohere model: {result_cohere.model_metrics.to_dict()}")

Metrics with OpenAI model: {'Accuracy@k': {'1': 0.6, '2': 0.9, '3': 0.9, '4': 1.0, '5': 1.0, '6': 1.0, '7': 1.0, '8': 1.0, '9': 1.0, '10': 1.0}, 'Precision@k': {'1': 0.6, '2': 0.65, '3': 0.5333333333333333, '4': 0.575, '5': 0.64, '6': 0.5333333333333333, '7': 0.45714285714285713, '8': 0.4, '9': 0.3555555555555555, '10': 0.32}, 'Recall@k': {'1': 0.12, '2': 0.26000000000000006, '3': 0.32, '4': 0.4600000000000001, '5': 0.64, '6': 0.64, '7': 0.64, '8': 0.64, '9': 0.64, '10': 0.64}, 'NDCG@k': {'1': 0.6, '2': 0.6386852807234542, '3': 0.559216382193173, '4': 0.5828859850023694, '5': 0.6244929539203834, '6': 0.6244929539203834, '7': 0.6244929539203834, '8': 0.6244929539203834, '9': 0.6244929539203834, '10': 0.6244929539203834}, 'MRR@k': {'1': 0.6, '2': 0.75, '3': 0.75, '4': 0.775, '5': 0.775, '6': 0.775, '7': 0.775, '8': 0.775, '9': 0.775, '10': 0.775}, 'MAP@k': {'1': 0.6, '2': 0.575, '3': 0.4611111111111111, '4': 0.4520833333333333, '5': 0.48166666666666674, '6': 0.48166666666666674, '7': 0.4