## From Zero Instructions:

1. Install Okareo's Python SDK: &nbsp;&nbsp;  `pip install okareo`  &nbsp;&nbsp;  (just run the cell below)

2. Get your API token from [https://app.okareo.com/](https://app.okareo.com/).  
   (Note: You will need to register first.)

3. Go directly to the **"2. Create your API Token"** link on the landing page. You can skip all other steps.

4. Set the environment variable `OKAREO_API_KEY` to your generated API token.

In [None]:
%pip install okareo 

In [2]:
import os
from okareo import Okareo

OKAREO_API_KEY = os.environ["OKAREO_API_KEY"]
okareo = Okareo(OKAREO_API_KEY)

response = okareo.upload_scenario_set(file_path='./webbizz_10_articles.jsonl', scenario_name="Retrieval Articles Scenario")
print(response) # use scenario set id in output for the next step

ScenarioSetResponse(scenario_id='333155d5-0658-4080-b006-b83ad6c10797', project_id='394c2c12-be7a-47a6-911b-d6c673bc543b', time_created=datetime.datetime(2023, 11, 11, 3, 18, 14, 76735), type='SEED', tags=None, name='Retrieval Articles Scenario', seed_data=[], scenario_input=[], additional_properties={})


In [4]:
import os
from okareo import Okareo
from okareo_api_client.models import ScenarioSetGenerate, ScenarioSetResponse, ScenarioType

OKAREO_API_KEY = os.environ["OKAREO_API_KEY"]
okareo = Okareo(OKAREO_API_KEY)

# Use scenario set id from previous step as source for generation 
response = okareo.generate_scenarios(
    source_scenario_id="333155d5-0658-4080-b006-b83ad6c10797",
    name="questions from retrieval articles",
    number_examples=2,
    generation_type=ScenarioType.TEXT_REVERSE_QUESTION
)

print(response)

ScenarioSetResponse(scenario_id='0e03604a-c2e6-478c-a555-5aa5ef11bbbb', project_id='394c2c12-be7a-47a6-911b-d6c673bc543b', time_created=datetime.datetime(2023, 11, 11, 3, 20, 47, 101371), type='TEXT_REVERSE_QUESTION', tags=['seed:333155d5-0658-4080-b006-b83ad6c10797'], name='questions from retrieval articles', seed_data=[], scenario_input=[], additional_properties={})


In [4]:
# perform a test run using a scenario set loaded in the previous cell 
import os
import random
from datetime import datetime
from okareo import Okareo
from okareo_api_client.models import TestRunType

OKAREO_API_KEY = os.environ["OKAREO_API_KEY"]
okareo = Okareo(OKAREO_API_KEY)

# Callable to be applied to each scenario in the scenario set
def call_model(input: str):
    # call your embedding model and vector db retrieval being tested here using <input> from the scenario set
    # we are using a random response here for demonstration purposes
    article_ids = [
        "75eaa363-dfcc-499f-b2af-1407b43cb133",
        "ac0d464c-f673-44b8-8195-60c965e47525",
        "35a4fd5b-453e-4ca6-9536-f20db7303344",
        "a8a97b0e-8d9a-4a1c-b93e-83d2bc9e5266",
        "0b85c12f-6ea6-4d4a-85de-6c6e9a9f8c78",
        "cda67f1d-19f2-4b45-9f3e-3b8d67f8c6c5",
        "6e4f1c97-3f7a-4fcd-a4a3-69c9817c8fd1",
        "f658c264-4a8a-4c93-a6d7-9a3d75f5a6f3",
        "aacf7a34-9d3a-4e2a-9a5c-91f2a0e8a12d",
        "f1a37b5e-58c4-4f5a-bc42-1b70253b8bf3"
    ]
    selected_ids = random.sample(article_ids, 5)   
    scores = [5, 4, 3, 2, 1]
    # higher score value means more relevant
    parsed_ids_with_scores = [(id, score) for id, score in zip(selected_ids, scores)]

    model_response =  {'matches': 'additional context from the model'} 

    # return a tuple of (parsed_ids_with_scores, overall model response context)
    return parsed_ids_with_scores, model_response

# this will return a model if it already exists or create a new one if it doesn't
model_under_test = okareo.register_model(name="vectordb_retrieval")

# use the scenario id from the scenario set uploaded in the previous step
scenario_id='d8b4fd20-f958-4386-a000-8bd1c6c1633d'
today_with_time = datetime.now().strftime('%m-%d %H:%M:%S')
test_run_name=f"Retrieval Test Run {today_with_time}"

test_run_item = model_under_test.run_test(scenario_id=scenario_id, 
                                          model_invoker=call_model, 
                                          test_run_name=test_run_name,
                                          test_run_type=TestRunType.INFORMATION_RETRIEVAL)

# display model level metrics for the test run
print(test_run_item.id)
print(test_run_item.model_metrics.to_dict())

2e12f69a-02da-4b32-b284-72108277143f
{'accuracy@k': {'1': 0.0, '3': 0.2, '5': 0.4, '10': 0.4}, 'precision@k': {'1': 0.0, '3': 0.06666666666666667, '5': 0.08, '10': 0.04}, 'recall@k': {'1': 0.0, '3': 0.2, '5': 0.4, '10': 0.4}, 'ndcg@k': {'1': 0.0, '3': 0.11309297535714576, '5': 0.19484591188793923, '10': 0.19484591188793923}, 'mrr@k': {'1': 0.0, '3': 0.08333333333333333, '5': 0.12833333333333333, '10': 0.12833333333333333}, 'map@k': {'1': 0.0, '3': 0.08333333333333333, '5': 0.12833333333333333, '10': 0.12833333333333333}}


In [5]:
# you could also upload the scenario questions directly from jsonl vs. generating them from paragraphs of text
import os
from okareo import Okareo

OKAREO_API_KEY = os.environ["OKAREO_API_KEY"]
okareo = Okareo(OKAREO_API_KEY)

response = okareo.upload_scenario_set(file_path='./webbizz_10_questions.jsonl', scenario_name="Retrieval Test Scenario")
print(response) 

ScenarioSetResponse(scenario_id='4985a909-0c64-4c4b-8a21-4f9336088408', project_id='5314b96c-4b20-4088-94af-5b0b6fcd7f88', time_created=datetime.datetime(2023, 10, 31, 7, 0, 10, 542662), type='SEED', tags=None, name='Retrieval Test Scenario', seed_data=[], scenario_input=[], additional_properties={})
