In [2]:
# CREATE INSTANCE OF CHROMADB AND LOAD CORPUS
import chromadb
import pandas as pd
from chromadb.utils import embedding_functions

corpus_file = "./data/squad_corpus.jsonl"

# Load corpus from file
jsonObj = pd.read_json(path_or_buf=corpus_file, lines=True)

chroma_client = chromadb.Client(chromadb.config.Settings(allow_reset=True))
chroma_client.reset() # chromadb has some weird'a bug that hangs on to some state

embedding_model_name = "all-MiniLM-L6-v2"
#embedding_model_name = "/Users/mrpositive/Downloads/ft-models/autogen-retrieval-finetune-v11-1260-reverse-passages-unique-plus-full-trainx3"

collection_name = "squad-corpus"
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=embedding_model_name)

collection = chroma_client.get_or_create_collection(collection_name, 
                                            metadata={"hnsw:space": "cosine"}, 
                                            embedding_function=embedding_function)

# Add the documents to the collection with the corresponding metadata
collection.add(
    documents=list(jsonObj.input),
    ids=[str(x) for x in list(jsonObj.result)],
)


In [3]:
import os
from okareo.autogen_logger import AutogenLogger

os.environ["OKAREO_API_KEY"] = os.environ.get("OKAREO_API_KEY", "<YOUR-OKAREO-API-KEY>") # 👈 Put in your Okareo API Key

os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY", "<YOUR-OPENAI-API-KEY>") # 👈 Put in your OpenAI API Key

gpt4_config_list = [
    {
        "model": "gpt-4o",
        "api_key": os.environ["OPENAI_API_KEY"],
    }
]

gpt4_config = {
    "cache_seed": 42,  # change the cache_seed for different trials
    "temperature": 0,
    "config_list": gpt4_config_list,
    "timeout": 120,
}

logger_config = {
    "api_key": os.environ["OKAREO_API_KEY"],
    "tags": ["autogen-rag-example"],
    # "group_name": "my-autogen-groupchat", # add this to log to a specific group
}

autogen_logger = AutogenLogger(logger_config)

from autogen import AssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent


assistant = AssistantAgent(
    name="assistant",
    system_message="You are a helpful assistant.",
    llm_config={
        "timeout": 600,
        "cache_seed": 42,
        "config_list": gpt4_config_list,
    },
)


NO_UPDATE_PROMPT = """You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
context provided by the user.

You must give as short an answer as possible.

User's question is: {input_question}

Context is:
{input_context}
"""

# `task` indicates the kind of task we're working on. In this example, it's a `qa` task.
ragproxyagent = RetrieveUserProxyAgent(
    name="ragproxyagent",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=10,
    retrieve_config={
        "task": "qa",
        "client": chroma_client,
        "docs_path": None,
        "model": gpt4_config_list[0]["model"],
        "vector_db": None,
        "collection_name": collection_name,
        "customized_prompt": NO_UPDATE_PROMPT,
    },
    code_execution_config=False,
)



flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.


KeyboardInterrupt: 

In [6]:
from okareo.checks import ModelBasedCheck, CheckOutputType
from okareo import Okareo

OKAREO_API_KEY = os.environ['OKAREO_API_KEY']

# CONNECT TO OKAREO
okareo = Okareo(OKAREO_API_KEY)

# Define a ModelBasedCheck to evaluate question answering
prompt = """You will be given a question, context, and answer.

Your task is to evaluate the answer on one metric: Consistency.

Evaluation Criteria:

Consistency (True/False) - Does the answer align with the facts and details provided in the context? Answers that contradict or misrepresent the context should be marked as False.

Evaluation Steps:

Read the question, context, and answer carefully.
Compare the answer to the context and identify any contradictions, inaccuracies, or inconsistencies.
Assess whether the answer accurately reflects the details provided in the context.
Assign True if the answer is consistent; otherwise, assign False.

Context:

{scenario_result}

Question:

{scenario_input}

Answer:

{model_output}

Evaluation Form:
Output True or False ONLY."""

okareo.create_or_update_check(name='consistency_QA_pass_fail', description="Determine if the answer aligns with the facts and details provided in the context. Answers that contradict or misrepresent the context should be marked as False.", check=ModelBasedCheck(prompt_template=prompt, check_type=CheckOutputType.PASS_FAIL))



In [7]:
import os
import random
import string
from okareo import Okareo
from okareo.model_under_test import CustomModel
from okareo_api_client.models.test_run_type import TestRunType
from okareo.model_under_test import ModelInvocation

OKAREO_API_KEY = os.environ['OKAREO_API_KEY']

# CONNECT TO OKAREO
okareo = Okareo(OKAREO_API_KEY)
random_string = ''.join(random.choices(string.ascii_letters, k=5))


# CREATE CUSTOM MODEL
class CustomGenerationModel(CustomModel):

    # Constructor
    def __init__(self, name):
        super().__init__(name)

    # Define the invoke method to be called on each input of a scenario
    def invoke(self, input: str) -> tuple:

        with autogen_logger: # 👈 log to Okareo
            # call your model being tested using <input> from the scenario set
            assistant.reset()
            chat_result = ragproxyagent.initiate_chat(
                assistant, message=ragproxyagent.message_generator, problem=input, n_results=3
            )

            print(f"\n\nInput: {input}")
            print(f"\n\nChat result summary: {chat_result.summary}")

            # return a tuple of (model result, overall model response context)
            return ModelInvocation(model_prediction=chat_result.summary, model_input=input, model_output_metadata=chat_result.chat_history)


# REGISTER MODEL
model_under_test = okareo.register_model(
    name="Autogen Q/A Agents",
    model=CustomGenerationModel(name="Autogen Q/A Agents")
)

# CREATE SCENARIO SET
# Upload the questions to Okareo from the file
scenario = okareo.upload_scenario_set(file_path="./data/squad_qa_test_90.jsonl", scenario_name=f"Autogen RAG Questions - {random_string}")

# EVALUATION
evaluation = model_under_test.run_test(
    name="Autogen RAG Eval - SQuAD - k=3 - V11 Tuned - No Update",
    scenario=scenario,
    test_run_type=TestRunType.NL_GENERATION,
    calculate_metrics=True,
    checks=['reference_similarity','consistency_QA_pass_fail']
)

# VIEW RESULTS
print(f"See results in Okareo: {evaluation.app_link}")



[Okareo] - chat_completion
[Okareo] - function_use
[Okareo] - new_agent
[Okareo] - event
doc_ids:  [['6f7d5018-ea4a-49c2-b7f8-cc5a10873e63', '2d9c0725-688d-4ce7-9525-75de1fa3af70', '89c349c3-fb69-4944-8089-7743fef6f105']]
[32mAdding content of doc 6f7d5018-ea4a-49c2-b7f8-cc5a10873e63 to context.[0m
[32mAdding content of doc 2d9c0725-688d-4ce7-9525-75de1fa3af70 to context.[0m
[32mAdding content of doc 89c349c3-fb69-4944-8089-7743fef6f105 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot. You answer user's questions based on your own knowledge and the
context provided by the user.

You must give as short an answer as possible.

User's question is: What was the name of the Norman castle?

Context is:
The Normans had a profound effect on Irish culture and history after their invasion at Bannow Bay in 1169. Initially the Normans maintained a distinct culture and ethnicity. Yet, with time, they came to be subsumed into Irish culture to the point