<a target="_blank" href="https://colab.research.google.com/github/okareo-ai/okareo-python-sdk/blob/main/examples/generation_eval.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

## Welcome to Okareo!

Get your API token from [https://app.okareo.com/](https://app.okareo.com/) and set it in the cell below. 👇
   (Note: You will also need an OpenAI key.)



In [None]:
OKAREO_API_KEY = "<YOUR-OKAREO-API-TOKEN>"
OPENAI_API_KEY = "<YOUR-OPENAI-API-TOKEN>"

In [None]:
%pip install okareo
%pip install openai

In [None]:
# Import libraries
import os
import tempfile
from io import StringIO  
import pandas as pd

# Import Okareo libraries
from okareo import Okareo
from okareo_api_client.models.test_run_type import TestRunType
from okareo_api_client.models import ScenarioSetCreate, SeedData

# Create an instance of the Okareo client
okareo = Okareo(OKAREO_API_KEY)

# Load documents from Okareo's GitHub repository
webbizz_articles = os.popen('curl https://raw.githubusercontent.com/okareo-ai/okareo-python-sdk/main/examples/webbizz_10_articles.jsonl').read()

# Convert the JSONL string to a pandas DataFrame
articlesJson = pd.read_json(path_or_buf=StringIO(webbizz_articles), lines=True)

# Load documents from Okareo's GitHub repository
webbizz_questions = os.popen('curl https://raw.githubusercontent.com/okareo-ai/okareo-python-sdk/main/examples/webbizz_retrieval_questions.jsonl').read()

# Convert the JSONL string to a pandas DataFrame
questionsJson = pd.read_json(path_or_buf=StringIO(webbizz_questions), lines=True)

seed_inputs = questionsJson['input'].tolist()
seed_contexts = []
for i in range(len(seed_inputs)):
    context = ""
    for article_id in questionsJson['result'].tolist()[i]:
        context += articlesJson[articlesJson['result'] == article_id]['input'].values[0] + "\n"
    seed_contexts.append(context)

seed_data = []
for i in range(len(seed_inputs)):
    seed_data.append(SeedData(input_={'question': seed_inputs[i], 'context': seed_contexts[i]}, result='N/A'))
scenario_set_create = ScenarioSetCreate(
    name=f"QA Scenario w/ Context- Webbizz",
    seed_data=seed_data
)
scenario = okareo.create_scenario_set(scenario_set_create)



## Question Answer Model

In [None]:
# Import libraries
from openai import OpenAI
from datetime import datetime

# Import Okareo's handler for OpenAI models
from okareo.model_under_test import OpenAIModel

# Create an instance of the OpenAI client
client = OpenAI(api_key=OPENAI_API_KEY)

# Define a function to call the OpenAI API
# This function will be used to query the specified OpenAI model
def get_turbo_summary(messages, model="gpt-3.5-turbo", temperature=0, max_tokens=500):
  response = client.chat.completions.create(
    model=model,
    messages=messages,
    temperature=temperature, 
    max_tokens=max_tokens,
  )
  return response.choices[0].message.content

# Define a template for the user prompt
USER_PROMPT_TEMPLATE = "Question: {input.question} Context: {input.context}"

# Define a template to prompt the model to provide a summary
ANSWER_CONTEXT_TEMPLATE = """
You will be provided with context and a question.
Answer the question based on the context.
"""

# Create an instance of the OpenAIModel class
# This class is used to interact with the OpenAI model using user and system prompts
openai_model = OpenAIModel(
        model_id="gpt-4o",
        temperature=0,
        system_prompt_template=ANSWER_CONTEXT_TEMPLATE,
        user_prompt_template=USER_PROMPT_TEMPLATE,
    )

# Define the name of the model with the current timestamp
mut_name=f"OpenAI Answering Model - {datetime.now().strftime('%m-%d %H:%M:%S')}"

# Register the model to use in a test run
model_under_test = okareo.register_model(
    name=mut_name,
    model=openai_model,
    update=True
)

In [None]:
from okareo.checks import ModelBasedCheck, CheckType
check = okareo.create_or_update_check(
    name="Relevance Check",
    description="Relevance check for QA with context",
    check=ModelBasedCheck(
        prompt_template="""Rate the relevance of the answer on a scale of 1 to 5. The output should be only 1 number.
        Context: {input.context}
        Question: {input.question}
        Answer: {model_output}
        Evaluation Form (scores only):
            -Relevance:
        """,
        check_type=CheckType.SCORE
    ),
)

## Evaluation

In [None]:
# Create a name for the evaluation with the current timestamp
eval_name = f"Summarization Run - {datetime.now().strftime('%m-%d %H:%M:%S')}"

# Perform a test run using the scenario set
evaluation = model_under_test.run_test(
    name=eval_name,
    scenario=scenario,
    api_key=OPENAI_API_KEY,
    test_run_type=TestRunType.NL_GENERATION, # specify that we are testing a natural language generation model
    calculate_metrics=True,
    # define the metrics to calculate
    checks=[check.name]
)

# Generate a link back to Okareo for evaluation visualization
print(f"See results in Okareo: {evaluation.app_link}")