# Setup

In [1]:
import io
import json
import os
from pprint import pp

import pandas as pd
from termcolor import colored

# Parse Result

In [2]:
def get_directories(path):
    dirs = [dir for dir in os.listdir(path) if os.path.isdir(os.path.join(path, dir))]
    return dirs

In [3]:
def load_jsonl(path):
    lines = []
    with open(path, "r") as file:
        for line in file:
            lines.append(json.loads(line))
    return lines

In [4]:
def get_model_dir(model_dirs, gen_mode, model_name_escaped, dedup_str, n_tool_calls):
    for model_dir in model_dirs:
        if gen_mode == "structured" and "unstructured" in model_dir:
            continue

        if gen_mode in model_dir and model_name_escaped in model_dir and dedup_str in model_dir and n_tool_calls in model_dir:
            return model_dir
    return None

In [5]:
def index_it(idx, questions, solutions, generations, scores, idx_type="error"):

    if idx_type == "absolute":
        question = questions[idx]
        solution = solutions[idx]
        generation = generations[idx]
        score = [score for i, score in enumerate(scores) if i > 0 and score["id"] == idx]
    elif idx_type == "error":
        if idx+1 > len(scores):
            raise ValueError(f"There are {len(scores)-1} errors in this file. But you requested error {idx} (0-index).")
        score = scores[idx+1]
        abs_idx = score["id"] - 1
        question = questions[abs_idx]
        solution = solutions[abs_idx]
        generation = generations[abs_idx]
    else:
        raise ValueError("Wrong idx_type.")

    return question, solution, generation, score

In [6]:
def extract_values( question, solution, generation, score):
    user_query = question["question"]
    tools = question["function"]
    if not isinstance(tools, list):
        tools = [tools]

    raw_result = generation["result"]
    messages = generation["messages"]
    tool_calls = generation["tool_calls"]
    n_tool_calls = generation["n_tool_calls"]
    valid = score["valid"]
    error = score["error"]
    error_type = score["error_type"]
    decoded_result = score["model_result_decoded"] if "model_result_decoded" in score else None

    result = {
        "user_query": user_query,
        "tools": tools,
        "valid": valid,
        "tool_calls": tool_calls,
        "solution": solution,
        "error": error,
        "error_type": error_type,
        "raw_result": raw_result,
        "decoded_result": decoded_result,
        "messages": messages,
        "n_tool_calls": n_tool_calls
    }

    return result

In [7]:
def spec_to_result(spec, idx, idx_type):

    # Unpack spec
    bfcl_category = spec["bfcl_category"]
    model_name = spec["model_name"]
    dedup = spec["dedup"]
    out_dir = spec["out_dir"]
    gen_mode = spec["gen_mode"]
    n_tool_calls = spec["n_tool_calls"]

    # Get paths
    test_file = f"gorilla_openfunctions_v1_test_{bfcl_category}.json"
    questions_path = os.path.join("./data", test_file)
    solutions_path = os.path.join("./data/possible_answer", test_file)

    model_name_escaped = model_name.replace("/", "_")
    dedup_str = "dedup" if dedup else ""
    model_dirs = get_directories(out_dir)
    model_dir = get_model_dir(model_dirs, gen_mode, model_name_escaped, dedup_str, n_tool_calls)
    generations_path = os.path.join(out_dir, model_dir, "generations", test_file)
    scores_path = os.path.join(out_dir, model_dir, "scores", bfcl_category + "_score.json")

    # Load all files
    questions = load_jsonl(questions_path)
    solutions = load_jsonl(solutions_path)
    generations = load_jsonl(generations_path)
    scores = load_jsonl(scores_path)

    # Index into the exact question, solution, generation, score
    question, solution, generation, score = index_it(idx, questions, solutions, generations, scores, idx_type=idx_type)
    values = extract_values(question, solution, generation, score)

    result = spec | values | {"idx": idx, "idx_type": idx_type}
    return result

# Pretty Print

In [8]:
def pretty_print_conversation(messages):
    role_to_color = {
        "system": "red",
        "user": "green",
        "assistant": "blue",
        "function": "magenta",
    }

    for message in messages:
        if message["role"] == "system":
            print(colored(f"system: {message['content']}\n", role_to_color[message["role"]]))
        elif message["role"] == "user":
            print(colored(f"user: {message['content']}\n", role_to_color[message["role"]]))
        elif message["role"] == "assistant" and message.get("function_call"):
            print(colored(f"assistant: {message['function_call']}\n", role_to_color[message["role"]]))
        elif message["role"] == "assistant" and not message.get("function_call"):
            print(colored(f"assistant: {message['content']}\n", role_to_color[message["role"]]))
        elif message["role"] == "function":
            print(colored(f"function ({message['name']}): {message['content']}\n", role_to_color[message["role"]]))

In [9]:
def pp_result(result, width=200, verbose=0):
    print("-"*120)
    dedup_str = "-dedup" if dedup else ""
    print(f"Example {result['idx']} of {result['bfcl_category']} BFCL\t\t\t\t({result['model_name']}\t{result['gen_mode']}\t{result['n_tool_calls']}{dedup_str})")
    print("-"*120, end="\n\n")

    color_map = {}
    print(colored(f"User Query:\n{result['user_query']}", "yellow"), end="\n\n")

    print(colored(f"Valid: {result['valid']}", "blue"), end="\n\n")

    print(colored(f"Error:\n{result['error']}\n{result['error_type']}", "blue"), end="\n\n")

    print(colored("My Tool Calls:", "red"))
    for tool_call in result['tool_calls']:
        print(colored(tool_call, "red"))
    print()

    print(colored("Correct Tool Calls:", "green"))
    for tool_name, tool_args in result['solution'].items():
        print(colored(f"{{'tool_name': '{tool_name}', 'tool_arguments': {tool_args}}}", "green"))
    print()

    print(colored(f"Raw Result:\n{result['raw_result']}", "yellow"), end="\n\n")

    if verbose >= 1:

        print(colored(f"Decoded Result:\n{result['decoded_result']}", "blue"), end="\n\n")

        print("Given Tool Calls:")
        for tool in result["tools"]:
            pp(tool, width=width)
        print()

    if verbose >= 2:
        print("Messages:")
        pretty_print_conversation(result['messages'])

# Run it

In [10]:
# Inputs
bfcl_category = "multiple_function"

dedup = False
gen_mode = "meta_tool"
n_tool_calls = "auto"
model_name = "databricks/dbrx-instruct"
out_dir = "./outputs"

spec = {
    "bfcl_category": bfcl_category,
    "gen_mode": gen_mode,
    "n_tool_calls": n_tool_calls,
    "model_name": model_name,
    "dedup": dedup,
    "out_dir": out_dir,
}

In [12]:
idx = 0
result = spec_to_result(spec, idx, idx_type="error")
tools = result["tools"]
user_query = result["user_query"]
pp_result(result)

------------------------------------------------------------------------------------------------------------------------
Example 0 of multiple_function BFCL				(databricks/dbrx-instruct	meta_tool	auto)
------------------------------------------------------------------------------------------------------------------------

[33mUser Query:
Can you calculate the displacement of a car moving at an initial speed of 20 m/s and then accelerates at 10 m/s^2 for 5 seconds? (assuming a straight line motion)[0m

[34mValid: False[0m

[34mError:
['Wrong number of functions.']
parallel_function_checker_no_order:wrong_count[0m

[31mMy Tool Calls:[0m
[31m{'tool_name': 'kinematics.calculate_displacement', 'tool_arguments': {'initial_speed': 20, 'acceleration': 10, 'time': 5}}[0m
[31m{'tool_name': 'kinematics.calculate_displacement', 'tool_arguments': {'initial_speed': 20, 'acceleration': 10, 'time': 5}}[0m

[32mCorrect Tool Calls:[0m
[32m{'tool_name': 'kinematics.calculate_displacement',

# Fix Prompt Here

In [None]:
from tool_use.tool import Tool
# from tool_use.prompt import get_meta_tool_system_prompt
# from tool_use.utils.meta_tool import get_meta_tool
from tool_use.utils.regex_util import tools_to_schema

In [None]:
# Initialize tool
base_url = "http://localhost:8000/v1"
api_key = "-"
model_name = "databricks/dbrx-instruct"
my_tool = Tool(base_url, api_key, model_name)

In [None]:
# get_meta_tool
description = 'Pick\'s the relevant tool that corresponds to user\'s query.'
description += 'Choose the "none" keyword if none of the tools are relevant to the user\'s query. If you have already called a tool, choose the `none` argument. Do not repeat a tool call!'

meta_tool = {
    'name': 'pick_relevant_tool',
    'description': description,
    'parameters': {
        "type": "object",
        "properties": {
            "name": {
                "type": "string",
                "description": "The names of the relevant tools to use",
                "enum": []
            },
        },
        "required": ["name"]}
    }
tool_names = [tool['name'] for tool in tools]
tool_names.append("none")
meta_tool["parameters"]["properties"]["name"]["enum"] = tool_names
meta_tool

{'name': 'pick_relevant_tool',
 'description': 'Pick\'s the relevant tool that corresponds to user\'s query.Choose the "none" keyword if none of the tools are relevant to the user\'s query. If you have already called a tool, choose the `none` argument. Do not repeat a tool call!',
 'parameters': {'type': 'object',
  'properties': {'name': {'type': 'string',
    'description': 'The names of the relevant tools to use',
    'enum': ['kinematics.calculate_displacement',
     'kinematics.calculate_final_speed',
     'none']}},
  'required': ['name']}}

In [None]:
# get_meta_tool_system_prompt
tools_schema = tools_to_schema(tools)
meta_tool_schema = tools_to_schema(meta_tool)
tool_list_start="<tool>",
tool_list_end="</tools>",
tool_call_start="<tool_call>",
tool_call_end="</tool_call>",
tool_response_start="<tool_response>",
tool_response_end="</tool_response>"


_system_prompt = """You are a function calling AI model. Your job is to answer the user's questions and you may call one or more functions to do this.


Please use your own judgment as to whether or not you should call a function. In particular, you must follow these guiding principles:
1. You may call one or more functions to assist with the user query. You should call multiple functions when the user asks you to.
2. You do not need to call a function. If none of the functions can be used to answer the user's question, please do not make the function call.
3. Don't make assumptions about what values to plug into functions. If you are missing the parameters to make a function call, please ask the user for the parameters.
4. You may assume the user has implemented the function themselves.
5. You may assume the user will call the function on their own. You should NOT ask the user to call the function and let you know the result; they will do this on their own.
6. Never call a tool twice with the same exact arguments. Do not repeat your tool calls!

You can only call functions according the following formatting rules:
Rule 1: All the functions you have access to are contained within {tool_list_start}{tool_list_end} XML tags. You cannot use any functions that are not listed between these tags.

Rule 2: For each function call return a json object (using quotes) with function name and arguments within {tool_call_start}\n{{ }}\n{tool_call_end} XML tags as follows:
* With arguments:
{tool_call_start}\n{{"tool_name": "function_name", "tool_arguments": {{"argument_1_name": "value", "argument_2_name": "value"}} }}\n{tool_call_end}
* Without arguments:
{tool_call_start}\n{{ "tool_name": "function_name", "tool_arguments": {{}} }}\n{tool_call_end}
In between {tool_call_start} and{tool_call_end} tags, you MUST respond in a valid JSON schema.
In between the {tool_call_start} and {tool_call_end} tags you MUST only write in json; no other text is allowed.

Rule 3: If user decides to run the function, they will output the result of the function call between the {tool_response_start} and {tool_response_end} tags. If it answers the user's question, you should incorporate the output of the function in your answer.


Here are the tools available to you:
{tool_list_start}\n{tools_schema}\n{tool_list_end}

Output the name of the "tool_name" that you would want to call adherent to the following JSON format {meta_tool_schema}.
Remember, if you do not want to call any tools, please use the "none" keyword. Do not try and call a tool that is not listed in the tool list.
"""

system_prompt = _system_prompt.format(
    tool_list_start=tool_list_start,
    tool_list_end=tool_list_end,
    tool_call_start=tool_call_start,
    tool_call_end=tool_call_end,
    tool_response_start=tool_response_start,
    tool_response_end=tool_response_end,
    tools_schema=tools_schema,
    meta_tool_schema=meta_tool_schema
    )


In [None]:
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_query}]

In [None]:
# Meta tool generation
output_messages, tool_calls = my_tool(messages, tools, gen_mode="meta_tool", n_tool_calls="auto")
print("Meta Tool Generation:")
if tool_calls:
    for tool_call in tool_calls: print(tool_call)

Meta Tool Generation:
{'tool_name': 'kinematics.calculate_displacement', 'tool_arguments': {'initial_speed': 20, 'acceleration': 10, 'time': 5}}
