In [21]:
import json
import os
from pprint import pp

import pandas as pd

In [2]:
def get_directories(path):
    dirs = [dir for dir in os.listdir(path) if os.path.isdir(os.path.join(path, dir))]
    return dirs

In [3]:
def load_jsonl(path):
    lines = []
    with open(path, "r") as file:
        for line in file:
            lines.append(json.loads(line))
    return lines

In [4]:
def get_model_dir(model_dirs, gen_mode, model_name_escaped, dedup_str, tool_call):
    for model_dir in model_dirs:
        if gen_mode == "structured" and "unstructured" in model_dir:
            continue

        if gen_mode in model_dir and model_name_escaped in model_dir and dedup_str in model_dir and tool_call in model_dir:
            return model_dir

In [5]:
def index_it(idx, questions, solutions, generations, scores, idx_type="error"):

    if idx_type == "absolute":
        question = questions[idx]
        solution = solutions[idx]
        generation = generations[idx]
        score = [score for i, score in enumerate(scores) if i > 0 and score["id"] == idx]
    elif idx_type == "error":
        score = scores[idx+1]
        abs_idx = score["id"] - 1
        question = questions[abs_idx]
        solution = solutions[abs_idx]
        generation = generations[abs_idx]
    else:
        raise ValueError("Wrong idx_type.")

    return question, solution, generation, score

In [19]:
def extract_result( question, solution, generation, score):
    user_query = question["question"]
    tools = question["function"]
    if not isinstance(tools, list):
        tools = [tools]

    raw_result = generation["result"]
    messages = generation["messages"]
    tool_calls = generation["tool_calls"]
    n_tool_calls = generation["n_tool_calls"]
    valid = score["valid"]
    error = score["error"]
    error_type = score["error_type"]
    decoded_result = score["model_result_decoded"]

    result = {
        "user_query": user_query,
        "tools": tools,
        "valid": valid,
        "tool_calls": tool_calls,
        "error": error,
        "error_type": error_type,
        "raw_result": raw_result,
        "decoded_result": decoded_result,
        "messages": messages,
        "n_tool_calls": n_tool_calls
    }

    return result

In [6]:
# Inputs
bfcl_category = "simple"

dedup = True
gen_mode = "structured"
tool_call = "auto"
model_name = "databricks/dbrx-instruct"
out_dir = "./outputs"

In [7]:
# Get paths
test_file = f"gorilla_openfunctions_v1_test_{bfcl_category}.json"
questions_path = os.path.join("./data", test_file)
solutions_path = os.path.join("./data/possible_answer", test_file)


model_name_escaped = model_name.replace("/", "_")
dedup_str = "dedup" if dedup else ""
model_dirs = get_directories(out_dir)
model_dir = get_model_dir(model_dirs, gen_mode, model_name_escaped, dedup_str, tool_call)
generations_path = os.path.join(out_dir, model_dir, "generations", test_file)
scores_path = os.path.join(out_dir, model_dir, "scores", bfcl_category + "_score.json")

In [8]:
# Load all files
questions = load_jsonl(questions_path)
solutions = load_jsonl(solutions_path)
generations = load_jsonl(generations_path)
scores = load_jsonl(scores_path)

In [9]:
idx = 0
idx_type = "error"

values = index_it(idx, questions, solutions, generations, scores, idx_type="error")
question, solution, generation, score = tuple(values)

In [26]:
result = extract_result(question, solution, generation, score)
pp(result, width=180)

{'user_query': 'Find the area of a triangle with a base of 10 units and height of 5 units.',
 'tools': [{'name': 'calculate_triangle_area',
            'description': 'Calculate the area of a triangle given its base and height.',
            'parameters': {'type': 'dict',
                           'properties': {'base': {'type': 'integer', 'description': 'The base of the triangle.'},
                                          'height': {'type': 'integer', 'description': 'The height of the triangle.'},
                                          'unit': {'type': 'string', 'description': "The unit of measure (defaults to 'units' if not specified)"}},
                           'required': ['base', 'height']}}],
 'valid': False,
 'tool_calls': [{'tool_name': 'calculate_triangle_area', 'tool_arguments': {'base': 10, 'height': 5}},
                {'tool_name': 'calculate_triangle_area', 'tool_arguments': {'base': 10, 'height': 5, 'unit': 'units'}}],
 'error': ['Wrong number of functions.'],
