<a href="https://colab.research.google.com/github/rohithmsr/AI-practice/blob/main/Opik/4_0_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<img src="https://raw.githubusercontent.com/comet-ml/opik/main/apps/opik-documentation/documentation/static/img/opik-logo.svg" width="250"/>

# Evaluation with Opik

In this exercise, you'll implement a basic evaluation pipeline with Opik. You can use OpenAI or open source models via LiteLLM

# Imports & Configuration

In [None]:
! pip install opik openai comet_ml litellm --quiet

In [None]:
import opik
from opik import Opik, track
from opik.evaluation import evaluate
from opik.evaluation.metrics import (IsJson)
from opik.integrations.openai import track_openai
import openai
import os
from datetime import datetime
from getpass import getpass
import litellm

# Define project name to enable tracing
os.environ["OPIK_PROJECT_NAME"] = "food_chatbot_eval"

In [None]:
# Opik configuration
if "OPIK_API_KEY" not in os.environ:
  os.environ["OPIK_API_KEY"] = getpass("Enter your Opik API key: ")

opik.configure()

In [None]:
# OpenAI configuration (ignore if you're using LiteLLM)
if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")

In [None]:
MODEL = "gpt-4o-mini"

In [None]:
client = opik.Opik()

# Dataset

In [None]:
# Create or get the dataset
dataset = client.get_or_create_dataset(name="foodchatbot_eval")

## Optional: Download Dataset From Comet

If you have not previously created the `foodchatbot_eval` dataset in your Opik workspace, run the following code to download the dataset as a Comet Artifact and populate your Opik dataset.

If you have already created the `foodchatbot_eval` dataset, you can skip to the next section

In [None]:
import comet_ml

In [None]:
experiment = comet_ml.start(project_name="foodchatbot_eval")

logged_artifact = experiment.get_artifact(artifact_name="foodchatbot_eval",
                                        workspace="examples")
local_artifact = logged_artifact.download("./")
experiment.end()

In [None]:
import csv
import json
# Read the CSV file and insert items into the dataset
with open('./foodchatbot_clean_eval_dataset.csv', newline='') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        index, question, response = row
        item = {
            "index": index,
            "question": question,
            "response": response
        }

        dataset.insert([item])

# Templates & Prompts

In [None]:
# menu items
menu_items = """
Menu: Kids Menu
Food Item: Mini Cheeseburger
Price: $6.99
Vegan: N
Popularity: 4/5
Included: Mini beef patty, cheese, lettuce, tomato, and fries.

Menu: Appetizers
Food Item: Loaded Potato Skins
Price: $8.99
Vegan: N
Popularity: 3/5
Included: Crispy potato skins filled with cheese, bacon bits, and served with sour cream.

Menu: Appetizers
Food Item: Bruschetta
Price: $7.99
Vegan: Y
Popularity: 4/5
Included: Toasted baguette slices topped with fresh tomatoes, basil, garlic, and balsamic glaze.

Menu: Main Menu
Food Item: Grilled Chicken Caesar Salad
Price: $12.99
Vegan: N
Popularity: 4/5
Included: Grilled chicken breast, romaine lettuce, Parmesan cheese, croutons, and Caesar dressing.

Menu: Main Menu
Food Item: Classic Cheese Pizza
Price: $10.99
Vegan: N
Popularity: 5/5
Included: Thin-crust pizza topped with tomato sauce, mozzarella cheese, and fresh basil.

Menu: Main Menu
Food Item: Spaghetti Bolognese
Price: $14.99
Vegan: N
Popularity: 4/5
Included: Pasta tossed in a savory meat sauce made with ground beef, tomatoes, onions, and herbs.

Menu: Vegan Options
Food Item: Veggie Wrap
Price: $9.99
Vegan: Y
Popularity: 3/5
Included: Grilled vegetables, hummus, mixed greens, and a wrap served with a side of sweet potato fries.

Menu: Vegan Options
Food Item: Vegan Beyond Burger
Price: $11.99
Vegan: Y
Popularity: 4/5
Included: Plant-based patty, vegan cheese, lettuce, tomato, onion, and a choice of regular or sweet potato fries.

Menu: Desserts
Food Item: Chocolate Lava Cake
Price: $6.99
Vegan: N
Popularity: 5/5
Included: Warm chocolate cake with a gooey molten center, served with vanilla ice cream.

Menu: Desserts
Food Item: Fresh Berry Parfait
Price: $5.99
Vegan: Y
Popularity: 4/5
Included: Layers of mixed berries, granola, and vegan coconut yogurt.
"""


In [None]:
prompt_template = """Answer a question about the following menu:

# MENU
{menu}

# QUESTION
{question}
"""

# LLM Application


In [None]:
# Simple little client class for using different LLM APIs (OpenAI or LiteLLM)
class LLMClient:
  def __init__(self, client_type: str ="openai", model: str ="gpt-4"):
    self.client_type = client_type
    self.model = model

    if self.client_type == "openai":
      self.client = track_openai(openai.OpenAI())

    else:
      self.client = None

  # LiteLLM query function
  def _get_litellm_response(self, query: str, system: str = "You are a helpful assistant."):
    messages = [
        {"role": "system", "content": system },
        { "role": "user", "content": query }
    ]

    response = litellm.completion(
        model=self.model,
        messages=messages
    )

    return response.choices[0].message.content

  # OpenAI query function - use **kwargs to pass arguments like temperature
  def _get_openai_response(self, query: str, system: str = "You are a helpful assistant.", **kwargs):
    messages = [
        {"role": "system", "content": system },
        { "role": "user", "content": query }
    ]

    response = self.client.chat.completions.create(
        model=self.model,
        messages=messages,
        **kwargs
    )

    return response.choices[0].message.content


  def query(self, query: str, system: str = "You are a helpful assistant.", **kwargs):
    if self.client_type == 'openai':
      return self._get_openai_response(query, system, **kwargs)

    else:
      return self._get_litellm_response(query, system)





In [None]:
llm_client = LLMClient(model=MODEL)

In [None]:
@track
def chatbot_application(input: str) -> str:
    response = llm_client.query(prompt_template.format(menu=menu_items, question=input))
    return response



# Evaluation

In [None]:
# Define the evaluation task
def evaluation_task(x):
    return {
        "input": x['question'],
        "output": chatbot_application(x['question']),
        "context": menu_items,
        "reference": x['response']
    }


In [None]:
#dataset = client.get_or_create_dataset(name="foodchatbot_eval")

In [None]:
# Define the metrics
metrics = [IsJson()]

# experiment_name
experiment_name = MODEL + "_" + dataset.name + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# run evaluation
evaluation = evaluate(
    experiment_name=experiment_name,
    dataset=dataset,
    task=evaluation_task,
    scoring_metrics=metrics,
    experiment_config={
        "model": MODEL
    }
)