# Function Calling Evaluation on Gorilla BFCL Dataset

**🎯 Goal**:
- Run a function calling evaluation in Okareo using the BFCL v3 data.
- Provide a simple introduction to Okareo evaluations.

**📋 Steps**:
1. Upload a function calling scenario using the BFCL data.
2. Define a custom model to generate the function calls
3. Run the evaluation using the scenario (from #1) + model (from #2) along with Okareo predefined checks to measure function call accuracy, including:
    - function_call_validator
    - is_function_correct
    - are_all_params_expected
    - are_required_params_present
    - do_param_values_match

**Notes**
- We use the ['simple' data](https://huggingface.co/datasets/gorilla-llm/Berkeley-Function-Calling-Leaderboard/blob/main/BFCL_v3_live_simple.json) that consists of a single turn. Other variants with multi-turn conversation histories are also in [this repo](https://huggingface.co/datasets/gorilla-llm/Berkeley-Function-Calling-Leaderboard/tree/main).
- The 'ground truth'/expected results are available in the ['possible answers' directory](https://huggingface.co/datasets/gorilla-llm/Berkeley-Function-Calling-Leaderboard/tree/main/possible_answer) of the repo.


In [1]:
import os

# get Okareo client
from okareo import Okareo

OKAREO_API_KEY = os.environ.get("OKAREO_API_KEY")
if not OKAREO_API_KEY:
    raise ValueError("OKAREO_API_KEY environment variable is not set")
okareo = Okareo(OKAREO_API_KEY)

In [2]:
def make_function_name_safe(text: str) -> str:
    return text.replace(".", "_")

Normally we would use `datasets` from huggingface to download the data, but there was an issue with the column names.

Instead, use the manually downloaded files.

In [3]:
# import os
# from huggingface_hub import login
# from datasets import load_dataset

# # Authenticate with your Hugging Face API token
# HUGGINGFACE_API_TOKEN = os.environ["HUGGINGFACE_API_KEY"]
# login(token=HUGGINGFACE_API_TOKEN)

# dataset = load_dataset("gorilla-llm/Berkeley-Function-Calling-Leaderboard", split="train")

import json

input_filename = "./data/BFCL_v3_live_simple.json"
result_filename = "./data/possible_answers/BFCL_v3_live_simple.json"

inputs = {}
with open(input_filename, "r") as f:
    lines = f.readlines()
    for line in lines:
        input_ = json.loads(line)
        # make function names safe to use with OpenAI, which  expects only alphanumeric characters, underscores, or hyphens
        function_list = []
        for f in input_["function"]:
            f['name'] = make_function_name_safe(f['name'])
            function_list.append(f)
        inputs[input_["id"]] = {"question": input_["question"], "function": function_list}

results = {}
with open(result_filename, "r") as f:
    lines = f.readlines()
    for line in lines:
        result = json.loads(line)
        function_list = []
        for f in result["ground_truth"]:
            f_safe = {make_function_name_safe(k): v for k, v in f.items()}
            function_list.append(f_safe)
        results[result["id"]] = {"ground_truth": function_list}

In [4]:
inputs['live_simple_0-0-0']['function']

[{'name': 'get_user_info',
  'description': 'Retrieve details for a specific user by their unique identifier.',
  'parameters': {'type': 'dict',
   'required': ['user_id'],
   'properties': {'user_id': {'type': 'integer',
     'description': 'The unique identifier of the user. It is used to fetch the specific user details from the database.'},
    'special': {'type': 'string',
     'description': 'Any special information or parameters that need to be considered while fetching user details.',
     'default': 'none'}}}}]

In [5]:
results['live_simple_0-0-0']['ground_truth']

[{'get_user_info': {'user_id': [7890], 'special': ['black']}}]

In [6]:
# transform the input/result data into the required scenario format

from okareo_api_client.models.scenario_set_create import ScenarioSetCreate
from okareo_api_client.models.seed_data import SeedData

seed_data = []

for key in inputs.keys():
    input_ = inputs[key]
    result = results[key]
    func_name = list(result['ground_truth'][0].keys())[0]
    func_args = {k: v[0] for k, v in result['ground_truth'][0][func_name].items() if len(v) > 0}
    parsed_result = {
        "function": {
            "name": func_name,
            "arguments": func_args,
        }
    }
    seed_data.append(
        SeedData(
            input_ = {
                "question": input_["question"],
                "function": input_["function"]
            },
            result = parsed_result
        )
    )

tool_scenario = okareo.create_scenario_set(
    ScenarioSetCreate(
        name=f"BFCL Gorilla - Simple Function Call",
        seed_data=seed_data,
    ) 
)

print(tool_scenario.app_link)

http://localhost:3000/project/2765bfd0-f296-4ae6-8ef9-acbfd56dc786/scenario/2edac4a3-4ba2-43ee-a7a6-e6916f238165


In [7]:
tool_scenario_small = okareo.create_scenario_set(
    ScenarioSetCreate(
        name=f"BFCL Gorilla - Simple Function Call (10 rows)",
        seed_data=seed_data[:10],
    ) 
)



In [8]:
py_to_oai_types = {
    "str": "string",
    "int": "integer",
    "float": "number",
    "bool": "boolean",
    "list": "array",
    "dict": "object",
}

def parse_input_to_openai_tool(input_function):
    # format the function call to be used in the OpenAI completion 'tools' arg
    # see here: https://platform.openai.com/docs/guides/function-calling
    tools = []
    for tool in input_function:
        params = tool["parameters"]
        tools.append({
            "type": "function",
            "function": {
                "name": tool["name"],
                "description": tool["description"],
                "parameters": {
                    "type": py_to_oai_types[params["type"]],
                    "properties": params["properties"],
                    "required": params["required"],
                }
            }
        })
    return tools

In [9]:
# seed_data[0].input_['function']
parse_input_to_openai_tool(seed_data[0].input_['function'])

[{'type': 'function',
  'function': {'name': 'get_user_info',
   'description': 'Retrieve details for a specific user by their unique identifier.',
   'parameters': {'type': 'object',
    'properties': {'user_id': {'type': 'integer',
      'description': 'The unique identifier of the user. It is used to fetch the specific user details from the database.'},
     'special': {'type': 'string',
      'description': 'Any special information or parameters that need to be considered while fetching user details.',
      'default': 'none'}},
    'required': ['user_id']}}}]

In [18]:
from openai import OpenAI

client = OpenAI()

import re
from okareo.model_under_test import CustomModel, ModelInvocation

class FunctionCallModel(CustomModel):
    def __init__(self, name):
        super().__init__(name)

    def invoke(self, input_value):
        messages = input_value['question'][0]
        tools = parse_input_to_openai_tool(input_value["function"])
        completion = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            tools=tools
        )

        # extract the function call from the completion and return in invocation
        completion_tool_calls = []
        for tc in completion.choices[0].message.tool_calls:
            parsed_tool = tc.to_dict()
            parsed_tool['function']['arguments'] = json.loads(parsed_tool['function']['arguments'])
            completion_tool_calls.append(parsed_tool)
        out = {"tool_calls": completion_tool_calls} 
        return ModelInvocation(
            model_input=messages,
            tool_calls=out["tool_calls"]
        )

# Register the model to use in the test run
mut_name="GPT-4o Function Call Model"
model_under_test = okareo.register_model(
    name=mut_name,
    model=[FunctionCallModel(name=FunctionCallModel.__name__)],
    update=True
)

In [None]:
# evaluation that uses the scenario, check, and model
from okareo_api_client.models.test_run_type import TestRunType

eval_name = f"BFCL Gorilla - Simple Function Call Evaluation"
evaluation = model_under_test.run_test(
    name=eval_name,
    scenario=tool_scenario_small.scenario_id,
    test_run_type=TestRunType.NL_GENERATION,
    checks=["function_call_validator",
            "is_function_correct",
            "are_all_params_expected",
            "are_required_params_present",
            "do_param_values_match"]
)
print(f"See results in Okareo: {evaluation.app_link}")