In [1]:
import os
import json
from pprint import pprint

from datasets import load_dataset
from huggingface_hub import hf_hub_download
import pandas as pd
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

import promptquality as pq
from tqdm import tqdm
tqdm.pandas()

load_dotenv("../.env")
# pq.login("console.demo.rungalileo.io")

True

In [46]:

def get_bfcl_dataset(filename: str):
    print(filename)
    data_path = hf_hub_download(repo_id="gorilla-llm/Berkeley-Function-Calling-Leaderboard", filename=filename, repo_type="dataset")

    def load_file(file_path: str):
        result = []
        with open(file_path) as f:
            file = f.readlines()
            for line in file:
                result.append(json.loads(line))
        return result

    data = load_file(data_path)
    df = pd.DataFrame.from_dict(data)
    return df

def get_bfcl_possible_answer(filename: str):
    print(filename)
    data_path = f"../data/Berkeley-Function-Calling-Leaderboard/possible_answer/{filename}"

    def load_file(file_path: str):
        result = []
        with open(file_path) as f:
            file = f.readlines()
            for line in file:
                result.append(json.loads(line))
        return result

    data = load_file(data_path)
    return pd.DataFrame.from_dict(data)

def update_conversations(conversation, ground_truth):
    new_conv = []
    for i, (conv, gt) in enumerate(zip(conversation, ground_truth)):
        new_conv.append(conv[0])
        # Only append ground truth if it's not the last element
        if gt and i < len(conversation) - 1:
            new_conv.append({"role": "assistant", "content": str(gt)})
        elif not gt:
            break     
    return new_conv

def convert_tools_to_langchain_format(tools):
    """
    Convert from BFCL tool format to LangChain format.
    
    Args:
        tools: Either a single tool dict or a list of tool dicts in BFCL format
        
    Returns:
        Union[dict, list[dict]]: Tool(s) in LangChain format
    """
    def convert_type(t):
        """Convert Python/BFCL types to JSON Schema types"""
        type_mapping = {
            "float": "number",
            "int": "integer",
            "str": "string",
            "bool": "boolean",
            "dict": "object",
            "list": "array",
            "array": "array",
            "decimal": "number",
            "tuple": "array",
            
            "number": "number",
            "integer": "integer",
            "string": "string",
            "boolean": "boolean",
            "object": "object"
        }
        return type_mapping.get(t.lower() if isinstance(t, str) else t, "string")
    
    def convert_property(property_dict):
        """Convert a single property with proper handling of array items"""
        prop = {
            "description": property_dict["description"],
            "type": convert_type(property_dict["type"])
        }
        
        # Handle array types
        if prop["type"] == "array":
            # If items is explicitly defined, use it
            if "items" in property_dict:
                items_type = property_dict["items"].get("type", "number")
                prop["items"] = {"type": convert_type(items_type)}
            else:
                # Default to number type for arrays if not specified
                prop["items"] = {"type": "number"}
                
        return prop

    def convert_single_tool(tool):
        """Convert a single tool from BFCL to LangChain format"""
        properties = {}
        
        # Convert each property
        for k, v in tool["parameters"]["properties"].items():
            prop = convert_property(v)
            prop["title"] = k.title()
            properties[k] = prop

        return {
            "description": tool["description"],
            "properties": properties,
            "required": tool["parameters"].get("required", []),
            "title": tool["name"],
            "type": "object"
        }
    
    # Handle both single tool and list of tools
    if isinstance(tools, list):
        return [convert_single_tool(tool) for tool in tools]
    elif isinstance(tools, dict):
        return convert_single_tool(tools)
    else:
        raise ValueError("Input must be either a single tool dict or a list of tool dicts")

# Test the conversion with array properties
test_tools = [
    {
        "description": "Calculate distance between two points",
        "name": "get_distance",
        "parameters": {
            "properties": {
                "pointA": {
                    "description": "First point coordinates",
                    "type": "array"  # No items specified
                },
                "pointB": {
                    "description": "Second point coordinates",
                    "type": "array",
                    "items": {"type": "float"}  # Items specified
                }
            },
            "required": ["pointA", "pointB"],
            "type": "dict"
        }
    }
]

# Run test
print("Test with array properties:")
from pprint import pprint
pprint(convert_tools_to_langchain_format(test_tools))

Test with array properties:
[{'description': 'Calculate distance between two points',
  'properties': {'pointA': {'description': 'First point coordinates',
                            'items': {'type': 'number'},
                            'title': 'Pointa',
                            'type': 'array'},
                 'pointB': {'description': 'Second point coordinates',
                            'items': {'type': 'number'},
                            'title': 'Pointb',
                            'type': 'array'}},
  'required': ['pointA', 'pointB'],
  'title': 'get_distance',
  'type': 'object'}]


In [47]:
single_turn_filenames = [
    # "BFCL_v3_exec_simple.json",
    # "BFCL_v3_exec_multiple.json",
    # "BFCL_v3_exec_parallel.json", 
    # "BFCL_v3_exec_parallel_multiple.json",
    "BFCL_v3_irrelevance.json",
    # "BFCL_v3_chatable.json",
    # "BFCL_v3_multiple.json",
    # "BFCL_v3_parallel.json", 
]

multi_turn_filenames = [
    "BFCL_v3_multi_turn_base.json", 
    "BFCL_v3_multi_turn_long_context.json", 
    "BFCL_v3_multi_turn_miss_func.json", 
    "BFCL_v3_multi_turn_miss_param.json",
    "BFCL_v3_multi_turn_composite.json",  # Missing Parameters, Missing Functions, and Long-Context
]

filename = "BFCL_v3_irrelevance.json"
df = get_bfcl_dataset(filename).iloc[50:].reset_index().drop([7, 78, 79, 99, 169, 188]).sample(100, random_state=42)
df = df.rename(columns={"function": "tools"})


def remove_dot_from_name(x): # langchain requirement
    x[0]['title'] = x[0]['title'].replace(".", "_")
    return x

df["tools_langchain"] = df["tools"].apply(convert_tools_to_langchain_format)
df["tools_langchain"] = df["tools_langchain"].apply(remove_dot_from_name)

df = df.rename(columns={"question": "conversation"})
df["conversation"] = df.conversation.apply(lambda x: [x[0] for x in x if x])

df["n_turns"] = df.conversation.apply(lambda x: len(x))
df["len_query"] = df.conversation.apply(lambda x: len(x[-1]["content"]))
df["n_tools"] = df.tools.apply(lambda x: len(x))
# df["n_function_calls"] = df["involved_classes"].apply(lambda x: len(x))

name = os.path.splitext(filename)[0]
print(name, df.shape)
df.to_parquet(f"../data/datasets/{name}.parquet", engine="fastparquet")
df

BFCL_v3_irrelevance.json
BFCL_v3_irrelevance (100, 8)


Unnamed: 0,index,id,conversation,tools,tools_langchain,n_turns,len_query,n_tools
20,70,irrelevance_70,"[{'role': 'user', 'content': 'Calculate the co...","[{'name': 'calculate_mortgage_payment', 'descr...",[{'description': 'Calculate the monthly mortga...,1,126,1
43,93,irrelevance_93,"[{'role': 'user', 'content': 'What's the judge...","[{'name': 'law_firm.get_impactful_cases', 'des...",[{'description': 'Retrieve impactful cases han...,1,33,1
160,210,irrelevance_210,"[{'role': 'user', 'content': 'Which place in P...","[{'name': 'recipe_based_restaurants', 'descrip...",[{'description': 'Search for the restaurants b...,1,41,1
115,165,irrelevance_165,"[{'role': 'user', 'content': 'What type of ins...","[{'name': 'get_instrument_info', 'description'...",[{'description': 'Retrieves the details of a s...,1,35,1
152,202,irrelevance_202,"[{'role': 'user', 'content': 'Who won the worl...","[{'name': 'game_score.calculate', 'description...",[{'description': 'Calculate the final game sco...,1,30,1
...,...,...,...,...,...,...,...,...
146,196,irrelevance_196,"[{'role': 'user', 'content': 'What's the total...","[{'name': 'boardgame.calculate_score', 'descri...",[{'description': 'Calculate final scores for a...,1,65,1
26,76,irrelevance_76,"[{'role': 'user', 'content': 'How do I get the...","[{'name': 'investment_trend_analysis', 'descri...",[{'description': 'Analyze the trend of a user'...,1,40,1
24,74,irrelevance_74,"[{'role': 'user', 'content': 'What is the rate...",[{'name': 'investment_analysis.calculate_profi...,[{'description': 'Calculates the net profit gi...,1,90,1
141,191,irrelevance_191,"[{'role': 'user', 'content': 'Who won the last...","[{'name': 'get_match_stats', 'description': 'R...",[{'description': 'Retrieve the match statistic...,1,39,1


In [50]:
classes = [
 'TradingBot',
 'TicketAPI',
 'VehicleControlAPI',
 'MathAPI',
 'MessageAPI',
 'TravelAPI',
 'TwitterAPI']

files = ['trading_bot.json',
 'ticket_api.json',
 'vehicle_control.json',
 'math_api.json',
 'message_api.json',
 'travel_booking.json',
 'posting_api.json']

basepath = "../data/Berkeley-Function-Calling-Leaderboard/multi_turn_func_doc"
class_dict = {}

for class_name, file in zip(classes, files):
    filepath = os.path.join(basepath, file)
    # read json lines file as a list of dictionaries
    with open(filepath) as f:
        data = [json.loads(line) for line in f]
    class_dict[class_name] = data
    
class_dict.keys()

dict_keys(['TradingBot', 'TicketAPI', 'VehicleControlAPI', 'MathAPI', 'MessageAPI', 'TravelAPI', 'TwitterAPI'])

In [51]:
for filename in multi_turn_filenames:
    if filename == "BFCL_v3_multi_turn_base.json":
        samples = 150
    else:
        samples = 100
    df = get_bfcl_dataset(filename).iloc[50:].sample(samples, random_state=42) # first fifty contain the gorilla file system
    df_answer = get_bfcl_possible_answer(filename)
    df = df.merge(df_answer, on="id")
    
    # comprehension list to get the list of tools for each class
    df["tools"] = df.involved_classes.apply(lambda x: [class_dict[class_name] for class_name in x])
    df["tools"] = df.tools.apply(lambda x: [item for sublist in x for item in sublist])

    # remove missed functions from tools
    if "missed_function" in df.columns:
        df["tools"] = df.apply(lambda x: [tool for tool in x.tools if tool["name"] not in list(x.missed_function.values())[0]], axis=1)

    # convert tools to langchain format
    df["tools_langchain"] = df["tools"].apply(convert_tools_to_langchain_format)
    
    df = df.rename(columns={"question": "conversation"})
    # df["conversation"] = df.conversation.apply(lambda x: [x[0] for x in x if x])
    df["conversation"] = df.apply(lambda x: update_conversations(x["conversation"], x["ground_truth"]), axis=1)
    
    df["n_turns"] = df.conversation.apply(lambda x: len(x))
    df["len_query"] = df.conversation.apply(lambda x: len(x[-1]["content"]))
    df["n_tools"] = df.tools.apply(lambda x: len(x))
    df["n_function_calls"] = df["involved_classes"].apply(lambda x: len(x))

    name = os.path.splitext(filename)[0]
    print(name, df.shape)
    df.to_parquet(f"../data/datasets/{name}.parquet", engine="fastparquet")

# split the multi_turn_base dataset into single and multi function calls
df = pd.read_parquet("../data/datasets/BFCL_v3_multi_turn_base.parquet", engine="fastparquet") 
name = "BFCL_v3_multi_turn_base_single_func_call"
df[df.n_function_calls == 1].iloc[:50].to_parquet(f"../data/datasets/{name}.parquet", engine="fastparquet")

name = "BFCL_v3_multi_turn_base_multi_func_call"
df[df.n_function_calls > 1].iloc[:50].to_parquet(f"../data/datasets/{name}.parquet", engine="fastparquet")

BFCL_v3_multi_turn_base.json
BFCL_v3_multi_turn_base.json
BFCL_v3_multi_turn_base (150, 12)
BFCL_v3_multi_turn_long_context.json
BFCL_v3_multi_turn_long_context.json
BFCL_v3_multi_turn_long_context (100, 12)
BFCL_v3_multi_turn_miss_func.json
BFCL_v3_multi_turn_miss_func.json
BFCL_v3_multi_turn_miss_func (100, 13)
BFCL_v3_multi_turn_miss_param.json
BFCL_v3_multi_turn_miss_param.json
BFCL_v3_multi_turn_miss_param (100, 12)
BFCL_v3_multi_turn_composite.json
BFCL_v3_multi_turn_composite.json
BFCL_v3_multi_turn_composite (100, 13)
