<a target="_blank" href="https://colab.research.google.com/github/okareo-ai/okareo-python-sdk/blob/main/examples/generation_eval.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

## Welcome to Okareo!

Get your API token from [https://app.okareo.com/](https://app.okareo.com/) and set it in the cell below. 👇
   (Note: You will also need an OpenAI and Anthropic key.)



In [None]:
OKAREO_API_KEY = "YOUR_OKAREO_API_KEY"
OPENAI_API_KEY = "YOUR_OPENAI_API_KEY"
ANTHROPIC_API_KEY = "YOUR_ANTHROPIC_API_KEY"

In [None]:
%pip install okareo
%pip install openai
%pip install anthropic

In [None]:
import os
from anthropic import Anthropic

from okareo import Okareo
from okareo.model_under_test import CustomModel, OpenAIModel

okareo = Okareo(OKAREO_API_KEY)

RESPONSE_PROMPT = """
You are a customer support agent. Generate a helpful and professional response to the following customer inquiry:

Customer Inquiry: {scenario_input}

Context:
Our platform offers a comprehensive suite of features designed to streamline your business operations 
and boost your marketing efforts. We provide seamless integration with various popular CRM systems, 
allowing you to synchronize customer data, track interactions, and trigger automated workflows between 
our platform and your existing CRM. This integration process is user-friendly and secure, typically 
involving API keys and OAuth authentication. Our email marketing capabilities are equally robust, 
featuring an intuitive drag-and-drop email builder, advanced audience segmentation, AI-powered optimization 
tools, and detailed analytics to help you create, send, and track highly effective marketing campaigns. For 
businesses preferring alternative payment methods, we offer invoice billing as an option, particularly 
suitable for our business and enterprise plans. This invoicing system is overseen by dedicated account managers, 
can accommodate custom billing cycles and various invoicing requirements, and integrates with major accounting 
software for easy reconciliation. Whether you're looking to enhance your customer relationship management, 
supercharge your email marketing, or simplify your billing process, our platform is equipped to meet your 
needs and drive your business forward.

Your response:
"""

# Custom model for Claude
class ClaudeModel(CustomModel):
    def invoke(self, input: str) -> tuple:
        anthropic = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
        response = anthropic.messages.create(
            model="claude-3-5-sonnet-20240620",
            max_tokens=1024,
            messages=[
                {"role": "user", "content": RESPONSE_PROMPT.format(input=input)}
            ]
        )
        return response.content[0].text, {"model_response": "Generation successful"}

claude_model = okareo.register_model(
    name="Claude Sonnet 3.5",
    model=ClaudeModel(name="Claude Sonnet 3.5 Response Generation"),
    update=True
)

# OpenAI model
gpt_model = okareo.register_model(
    name="GPT-3.5",
    model=OpenAIModel(
        model_id="gpt-3.5-turbo-0125",
        temperature=0.7,
        system_prompt_template="You are a helpful customer support agent.",
        user_prompt_template=RESPONSE_PROMPT,
    ),
    update=True
)

In [None]:
from okareo import Okareo
from okareo_api_client.models import ScenarioSetCreate, SeedData, ScenarioType

okareo = Okareo("OKAREO_API_KEY")

scenario_set_create = ScenarioSetCreate(
    name="Customer Support Inquiries",
    seed_data=[
        SeedData(
            input_="Can I connect my CRM to your platform?",
            result="Certainly! I'd be happy to help you connect your CRM to our platform. We support integrations with several popular CRM systems. Could you please tell me which CRM you're using? I can then provide you with specific instructions or direct you to our integration guide for that particular CRM."
        ),
        SeedData(
            input_="Do you have a way to send marketing emails?",
            result="Yes, we do offer email marketing capabilities! Our platform includes a robust email marketing tool that allows you to create, send, and track marketing emails. You can design custom templates, segment your audience, schedule campaigns, and view detailed analytics on your email performance. Would you like me to provide more information about our email marketing features?"
        ),
        SeedData(
            input_="Can I get invoiced instead of using a credit card?",
            result="Absolutely! We do offer invoicing as a payment option for our customers. This is typically available for our business and enterprise plans. To set up invoicing, we'll need to review your account and set up the appropriate billing arrangements. Could you please provide your account details or company name? I'll be happy to initiate the process for switching you to invoice billing."
        ),
    ]
)

seed_scenario = okareo.create_scenario_set(scenario_set_create)

In [None]:
import random

from okareo.checks import CheckOutputType, ModelBasedCheck


check_response_quality = okareo.create_or_update_check(
    name="check_response_quality",
    description="Score the quality of the generated customer support response",
    check=ModelBasedCheck(
        prompt_template="""
        Evaluate the quality of the generated customer support response based on the following criteria:
        1. Relevance to the inquiry
        2. Clarity and coherence
        3. Professionalism and tone
        4. Helpfulness and informativeness

        Customer Inquiry: {scenario_input}
        Generated Response: {model_output}
        Expected Response: {scenario_result}

        Score the response on a scale of 0 to 10, where:
        0 = Poor quality, doesn't meet any criteria
        5 = Average quality, meets some criteria
        10 = Excellent quality, meets all criteria perfectly

        Only output the numeric score.
        """,
        check_type=CheckOutputType.SCORE,
    ),
)

In [None]:
from okareo_api_client.models.test_run_type import TestRunType

claude_evaluation = claude_model.run_test(
    name="Claude Customer Support",
    scenario=seed_scenario,
    test_run_type=TestRunType.NL_GENERATION,
    calculate_metrics=True,
    checks=['check_response_quality']
)

gpt_evaluation = gpt_model.run_test(
    name="GPT-3.5 Customer Support",
    scenario=seed_scenario,
    api_key=os.environ["OPENAI_API_KEY"],
    test_run_type=TestRunType.NL_GENERATION,
    calculate_metrics=True,
    checks=['check_response_quality']
)

print(f"Claude results: {claude_evaluation.app_link}")
print(f"GPT-3.5 results: {gpt_evaluation.app_link}")