# Command-R Function Calling Evaluation Demo

**Goal**: Use Okareo to evaluate a function calling agent.

**Steps**:
- Upload a function calling scenario
- Upload custom checks to assess the agent's function calling outputs
- Define a custom model to invoke Command-R for function calling
- Run the evaluation using the scenario, checks, and model

In [None]:
# get Okareo client
import os
from okareo import Okareo

OKAREO_API_KEY = os.environ["OKAREO_API_KEY"]
okareo = Okareo(OKAREO_API_KEY)

Upload a scenario based on data used in the Berkeley Function Calling Leaderboard.

See here for more details: https://gorilla.cs.berkeley.edu/leaderboard.html#leaderboard

In [None]:
import json

from okareo_api_client.models.scenario_set_create import ScenarioSetCreate
from okareo_api_client.models.seed_data import SeedData

file_path = "scenarios/google_api_scenario.jsonl"
seed_data = []

with open(file_path, "r") as f:
    lines = f.readlines()
    for line in lines:
        row = json.loads(line)
        seed_data.append(SeedData(
            input_=row["input"],
            result=row["result"].replace("'", "\""),
            # result=json.dumps(row["result"].replace("'", '\"')),
        ))

tool_scenario = okareo.create_scenario_set(
    ScenarioSetCreate(
        name="Google API Tool Scenario",
        seed_data=seed_data,
    ) 
)
print(tool_scenario.app_link)

Upload [code-based checks](https://docs.okareo.ai/docs/sdk/okareo_python#codebasedcheck) based on the [Evaluation Metrics](https://gorilla.cs.berkeley.edu/blogs/8_berkeley_function_calling_leaderboard.html#metrics) used in the Tool Calling Leaderboard. These checks include:

- "Is Function Correct"
- "Are Required Parameters Present"
- "Are All Parameters Expected"
- "Do Parameter Values Match"

In [None]:
from checks.is_function_correct import Check

test_1 = ['{"tool_calls": [{"name": "projects_getSupportedLanguages"}]}', '{"name": "projects_getSupportedLanguages"}']
test_2 = ['{"tool_calls": [{"name": "projects_getSupportedLanguages"}]}', '{"name": "projects_detectLanugage"}']
test_3 = [
    '{"role": "CHATBOT", "message": "I will use the SearchAnalytics API to find the romanised text in the project.", "tool_calls": [{"name": "projects_romanizeText", "parameters": {"parent": "cd9dc059-838a-485d-beae-48a9365932b6"}}]}',
    "{\"name\": \"projects_romanizeText\", \"parameter_definitions\": {\"parent\": {\"value\": \"cd9dc059-838a-485d-beae-48a9365932b6\", \"required\": true}}}"
]
for i, test in enumerate([test_1, test_2, test_3]):
    print(Check.evaluate(*test))

# upload the custom checks for function calling
correct_function_check = okareo.create_or_update_check(
    name=f"Is Function Correct",
    description="Checks if the generated function call in the model_output matches the expected function call in the scenario_result.",
    check=Check(),
)

In [None]:
from checks.are_required_params_present import Check

test_1 = ['{"tool_calls": [{"parameters": {"siteUrl": "http://example.com"}}]}', '{"parameter_definitions": {"siteUrl": {"required": true}}}']
test_2 = ['{"tool_calls": [{"parameters": {"parent": "http://example.com"}}]}', '{"parameter_definitions": {"siteUrl": {"required": true}}}']
for test in [test_1, test_2]:
    print(Check.evaluate(*test)) # True

required_params_check = okareo.create_or_update_check(
    name=f"Are Required Parameters Present",
    description="Checks if the generated parameters in the model_output contain the required parameters stipulated in the scenario_result.",
    check=Check(),
)

In [None]:
from checks.are_all_params_expected import Check

test_1 = ['{"tool_calls": [{"parameters": {"siteUrl": "http://example.com"}}]}', '{"parameter_definitions": {"siteUrl": {"required": true}}}']
test_2 = ['{"tool_calls": [{"parameters": {"siteUrl": "http://example.com", "parent": "http://example.com"}}]}', '{"parameter_definitions": {"siteUrl": {"required": true}}}']
test_3 = ['{"tool_calls": [{"parameters": {"siteUrl": "http://example.com", "parent": "http://example.com"}}]}', '{"parameter_definitions": {"siteUrl": {"required": true}, "parent": {"required": false}}}']
for test in [test_1, test_2, test_3]:
    print(Check.evaluate(*test)) # True

all_params_expected_check = okareo.create_or_update_check(
    name=f"Are All Parameters Expected",
    description="Checks if the generated parameter names in the model_output are expected based on the schema in the scenario_result.",
    check=Check(),
)

In [None]:
from checks.do_param_values_match import Check

test_1 = ['{"tool_calls": [{"parameters": {"siteUrl": "http://example.com"}}]}', '{"parameter_definitions": {"siteUrl": {"value": "http://example.com", "required": true}}}']
test_2 = ['{"tool_calls": [{"parameters": {"siteUrl": "http://example.com", "parent": "http://example.com"}}]}', '{"parameter_definitions": {"siteUrl": {"value": "whoops, wrong value","required": true}}}']
test_3 = ['{"tool_calls": [{"parameters": {"siteUrl": "http://example.com", "parent": "http://example.com"}}]}', '{"parameter_definitions": {"siteUrl": {"value": "http://example.com", "required": true}, "parent": {"value": "http://example.com", "required": false}}}']
for test in [test_1, test_2, test_3]:
    print(Check.evaluate(*test)) # True

param_values_match_check = okareo.create_or_update_check(
    name=f"Do Parameters Values Match",
    description="Checks if each specified parameter value in the scenario_result is captured in the generated parameter values in the model_output.",
    check=Check(),
)

Load the tool definitions for use with Command-R.

In [None]:
# load the tools to use with command-r
import json

def int_to_bool(obj):
    # Convert all integer values to booleans
    return {k: (bool(v) if isinstance(v, int) else v) for k, v in obj.items()}

search_filename = "apis/cohere_format/google_search.json"
with open(search_filename, "r") as f:
    search_contents = json.load(f, object_hook=int_to_bool)

translate_filename = "apis/cohere_format/google_translate.json"
with open(translate_filename, "r") as f:
    translate_contents = json.load(f, object_hook=int_to_bool)

Define the [CustomModel](https://docs.okareo.ai/docs/sdk/okareo_python#custommodel--modelinvocation) to call Command-R and parse its outputs.

In [None]:
# custom model that calls command-r with tools

from okareo.model_under_test import CustomModel, ModelInvocation
import cohere

COHERE_API_KEY = os.environ["COHERE_API_KEY"]

class CommandRToolModel(CustomModel):
    def __init__(self, name):
        super().__init__(name)
        self.client = cohere.Client(api_key=COHERE_API_KEY)
        self.tools = search_contents+translate_contents
        self.preamble = (
            "You are a Google API assistant helping a user translate their requests into code. "
            "The user will provide a description of the task they want to accomplish, "
            "and you will generate the corresponding Python code.\n\n"
            "Only output the code snippet that corresponds to the API call that answers the user's question. "
            "For example, 'my_api_call(\"my argument #1\", \"my argument #2\")'"
        )

    def invoke(self, input_value):
        response = self.client.chat(
            message=input_value,
            tools=self.tools,
            preamble=self.preamble,
            model="command-r",
        )
        # extract the tool completion
        message_out = response.chat_history[-1]
        print(message_out)
        return ModelInvocation(
            model_prediction=json.dumps(message_out),
            model_input=input_value,
            model_output_metadata=response.meta,
        )

# Register the model to use in the test run
mut_name="Command-R Tool Model"
model_under_test = okareo.register_model(
    name=mut_name,
    model=[CommandRToolModel(name=CommandRToolModel.__name__)],
    update=True
)

In [None]:
# test the model out
command_r_model = CommandRToolModel(name=CommandRToolModel.__name__) 
invocation = command_r_model.invoke("Can you help me get the sitemap for the following URL? https://arstechnica.com/")

Run a [Generation evaluation](https://docs.okareo.ai/docs/guides/generation_overview) in Okareo on the custom Command-R model. 

In [None]:
# evaluation that uses the scenario, check, and model
from okareo_api_client.models.test_run_type import TestRunType

eval_name = f"Command-R Tool Call evaluation"
evaluation = model_under_test.run_test(
    name=eval_name,
    scenario=tool_scenario.scenario_id,
    test_run_type=TestRunType.NL_GENERATION,
    checks=[
        correct_function_check.id,
        required_params_check.id,
        all_params_expected_check.id,
        param_values_match_check.id,
    ],
)
print(f"See results in Okareo: {evaluation.app_link}")