## From Zero Instructions:

1. Install Okareo's Python SDK: &nbsp;&nbsp;  `pip install okareo`  &nbsp;&nbsp;  (just run the cell below)

2. Get your API token from [https://app.okareo.com/](https://app.okareo.com/).  
   (Note: You will need to register first.)

3. Go directly to the **"2. Create your API Token"** link on the landing page. You can skip all other steps.

4. Set the environment variable `OKAREO_API_KEY` to your generated API token.

In [None]:
%pip install okareo 

In [None]:
%pip install chromadb

In [None]:
%pip install pandas

In [None]:
# Webbizz is an example web business. 
# We load 10 short summaries about different business aspects to the vector database.
import chromadb
import pandas as pd    

chroma_client = chromadb.Client()

collection = chroma_client.create_collection(name="retrieval_test", metadata={"hnsw:space": "cosine"})
jsonObj = pd.read_json(path_or_buf='./webbizz_10_articles.jsonl', lines=True)

collection.add(
    documents=list(jsonObj.input),
    ids=list(jsonObj.result)
)

In [None]:
# We have a set of 20 questions, each asking about different business aspects. 
# There is a specific summary that is the target answer for the question, which is linked by UUID.
# We upload these questions into a scenario set, so that they can be used in a test run in the next cell
import os
from okareo import Okareo

OKAREO_API_KEY = os.environ["OKAREO_API_KEY"]
okareo = Okareo(OKAREO_API_KEY)

response = okareo.upload_scenario_set(file_path='./webbizz_retrieval_questions.jsonl', scenario_name="Retrieval Articles Scenario")
print(response) # use scenario set id in output for the next step

In [None]:
# Perform a test run using a scenario set loaded in the previous cell 
import os
import random
from datetime import datetime
from okareo import Okareo
from okareo_api_client.models import TestRunType
from okareo.model_under_test import CustomModel

OKAREO_API_KEY = os.environ["OKAREO_API_KEY"]
okareo = Okareo(OKAREO_API_KEY)

# Callable to be applied to each scenario in the scenario set
def call_model(input: str):
    collection.query(
        query_texts=[input],
        n_results=5
    )
    # higher score value means more relevant
    results = collection.query(
        query_texts=[input],
        n_results=5
    )
    parsed_ids_with_scores = []
    for i in range(0, len(results['distances'][0])):
        score = (2 - results['distances'][0][i]) / 2 # this turns cosine distance into a 0 to 1 cosine similarity score
        parsed_ids_with_scores.append((results['ids'][0][i], score))

    model_response =  {'matches': 'additional context from the model'} 

    # return a tuple of (parsed_ids_with_scores, overall model response context)
    return parsed_ids_with_scores, model_response

# this will return a model if it already exists or create a new one if it doesn't
model_under_test = okareo.register_model(name="vectordb_retrieval_test", model=CustomModel(model_invoker=call_model, name="custom retrieval"))

today_with_time = datetime.now().strftime('%m-%d %H:%M:%S')
test_run_name=f"Retrieval Test Run {today_with_time}"

test_run_item = model_under_test.run_test(scenario=response, # use the scenario from the scenario set uploaded in the previous step
                                          name=test_run_name,
                                          test_run_type=TestRunType.INFORMATION_RETRIEVAL,
                                          calculate_metrics=True)

# display model level metrics for the test run
print(test_run_item.model_metrics.to_dict())
PROJECT_ID = test_run_item.project_id
print(f"https://app.okareo.com/project/{PROJECT_ID}/eval/{test_run_item.id}")