In [1]:
# Set the working directory to /workspace
import os

# Get current working directory before changing
current_dir = os.getcwd()
print(f"Current working directory: {current_dir}")

# Change to /workspace
os.chdir("/workspace")
print(f"Working directory changed to: {os.getcwd()}")

Current working directory: /workspace/notebooks
Working directory changed to: /workspace


In [22]:
from datasets import load_dataset, load_from_disk
import verifiers as vf
from verifiers.envs.bfcl_env import BfclEnv
import json

# Loading Env

In [3]:
env = BfclEnv(
    dataset="bfcl",
    tools=[],
    max_num_turns=1,
    max_steps_per_turn=10,
    curriculum_learning=True,
    use_latest_trl=False,
)

[32m2025-04-19 13:22:33.665[0m | [1mINFO    [0m | [36mverifiers.envs.bfcl_env[0m:[36m__init__[0m:[36m47[0m - [1mInitializing MultiStepEnv[0m
[32m2025-04-19 13:22:33.665[0m | [1mINFO    [0m | [36mverifiers.envs.bfcl_env[0m:[36m__init__[0m:[36m94[0m - [1mInitializing Scoring Rubric[0m
[32m2025-04-19 13:22:33.666[0m | [1mINFO    [0m | [36mverifiers.envs.bfcl_env[0m:[36m__init__[0m:[36m96[0m - [1mInitializing LLM + Env Parsers[0m


In [4]:
ds_train = env.get_dataset(max_num_turns=1)
ds_eval = env.get_eval_dataset(max_num_turns=1, max_turn_only=True)
ds_train

Map:   0%|          | 0/745 [00:00<?, ? examples/s]

Filter:   0%|          | 0/745 [00:00<?, ? examples/s]

Filter:   0%|          | 0/745 [00:00<?, ? examples/s]

Filter:   0%|          | 0/379 [00:00<?, ? examples/s]

Map:   0%|          | 0/745 [00:00<?, ? examples/s]

Filter:   0%|          | 0/745 [00:00<?, ? examples/s]

Filter:   0%|          | 0/745 [00:00<?, ? examples/s]

Filter:   0%|          | 0/366 [00:00<?, ? examples/s]

Dataset({
    features: ['id', 'question', 'initial_config', 'path', 'involved_classes', 'num_turns', 'answer', 'prompt', 'user_question_bank', 'ground_truth_bank', '__index_level_0__'],
    num_rows: 100
})

In [5]:
ds_train[0]

{'id': 'multi_turn_base_51',
 'question': '[[{"role": "user", "content": "I\'ve noticed that some of my car doors are slightly ajar while others seem to be securely locked. Would you be able to verify and make sure all doors are properly locked for safety?"}]]',
 'initial_config': '{"VehicleControlAPI": {"fuelLevel": 15.5, "batteryVoltage": 12.8, "engineState": "stopped", "doorStatus": {"driver": "unlocked", "passenger": "unlocked", "rear_left": "unlocked", "rear_right": "unlocked"}, "acTemperature": 22.0, "fanSpeed": 60, "acMode": "auto", "humidityLevel": 45.0, "headLightStatus": "off", "parkingBrakeStatus": "released", "parkingBrakeForce": 0.0, "slopeAngle": 0.0, "distanceToNextVehicle": 100.0, "cruiseStatus": "inactive", "destination": "None", "frontLeftTirePressure": 35.0, "frontRightTirePressure": 35.0, "rearLeftTirePressure": 33.0, "rearRightTirePressure": 33.0}}',
 'path': ['VehicleControlAPI.displayCarStatus',
  'VehicleControlAPI.lockDoors',
  'VehicleControlAPI.startEngine',


In [6]:
rubric = env.get_rubric()
training_args = vf.get_default_grpo_config(run_name="debug", num_gpus=2)
training_args.num_train_epochs = 1
training_args.num_generations = 2
training_args.max_completion_length = 2048
training_args.report_to = "none"
training_args.per_device_train_batch_size = 2
training_args.gradient_accumulation_steps = 4
training_args.num_iterations = 2
training_args.beta = 0.001
training_args.max_grad_norm = 0.2
training_args.eval_strategy = "steps"
training_args.eval_on_start = False
training_args.eval_steps = 50
training_args.save_strategy = "steps"
training_args.save_steps = 100
training_args.per_device_eval_batch_size = 2
training_args.eval_accumulation_steps = 1
training_args.data_seed = 42
training_args.sync_ref_model = True
training_args.ref_model_mixup_alpha = 1.0
training_args.ref_model_sync_steps = 100

In [7]:
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
model, tokenizer = vf.get_model_and_tokenizer(model_name)

trainer = vf.GRPOEnvTrainer(
    model=model,
    processing_class=tokenizer,
    reward_funcs=rubric,
    env=env,
    args=training_args,
    train_dataset=ds_train,
    eval_dataset=ds_eval,
    debug_generate=True,
    debug_rewards=True,
    model_name=model_name,
    run_name="debug_baseline",
    use_dr_grpo=False,
    test_hypothesis_clip_advantage=False,
    apply_overlong_filtering=True,
    print_sample_completions=True,
)

Using Liger kernel
Applied Liger kernels to Qwen2


You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.


INFO 04-19 13:22:41 __init__.py:207] Automatically detected platform cuda.
INFO 04-19 13:22:48 config.py:549] This model supports multiple tasks: {'reward', 'embed', 'generate', 'classify', 'score'}. Defaulting to 'generate'.
INFO 04-19 13:22:48 llm_engine.py:234] Initializing a V0 LLM engine (v0.7.3) with config: model='Qwen/Qwen2.5-1.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-1.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda:1, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_e



INFO 04-19 13:22:49 weight_utils.py:254] Using model weights format ['*.safetensors']
INFO 04-19 13:22:49 weight_utils.py:304] No model.safetensors.index.json found in remote.


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 04-19 13:22:50 model_runner.py:1115] Loading model weights took 2.8833 GB
INFO 04-19 13:22:51 worker.py:267] Memory profiling takes 1.11 seconds
INFO 04-19 13:22:51 worker.py:267] the current vLLM instance can use total_gpu_memory (23.55GiB) x gpu_memory_utilization (0.70) = 16.48GiB
INFO 04-19 13:22:51 worker.py:267] model weights take 2.88GiB; non_torch_memory takes 0.08GiB; PyTorch activation peak memory takes 2.02GiB; the rest of the memory reserved for KV Cache is 11.50GiB.
INFO 04-19 13:22:52 executor_base.py:111] # cuda blocks: 26912, # CPU blocks: 9362
INFO 04-19 13:22:52 executor_base.py:116] Maximum concurrency for 32768 tokens per request: 13.14x
INFO 04-19 13:22:55 model_runner.py:1434] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_u

Capturing CUDA graph shapes: 100%|██████████| 35/35 [00:16<00:00,  2.17it/s]

INFO 04-19 13:23:11 model_runner.py:1562] Graph capturing finished in 16 secs, took 0.22 GiB
INFO 04-19 13:23:11 llm_engine.py:436] init engine (profile, create kv cache, warmup model) took 21.11 seconds





In [8]:
trainer.train()

System Prompt: You are an expert in composing functions. You are given a question from a user and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to complete the task.
You have access to the following tools to help solve the task:

{"name": "authenticate_travel", "description": "Authenticate the user with the travel API", "parameters": {"type": "dict", "properties": {"client_id": {"type": "string", "description": "The client applications client_id supplied by App Management"}, "client_secret": {"type": "string", "description": "The client applications client_secret supplied by App Management"}, "refresh_token": {"type": "string", "description": "The refresh token obtained from the initial authentication"}, "grant_type": {"type": "string", "description": "The grant type of the authentication request. Here are the options: read_write, read, write"}, "user_first_name": {"type": "string", "description": "The first name of the user"}

KeyboardInterrupt: 

# Evaluating Outputs

In [10]:
eval_ds = load_from_disk(
    "notebooks/tmp_multiturn_example_rl/eval_result_Qwen2.5_1.5B_Instruct_step_402.hf"
)
eval_ds

Dataset({
    features: ['step', 'prompt', 'completion', 'reward', 'correctness', 'contains_gibberish', 'dataset_rows', 'train'],
    num_rows: 200
})

In [11]:
example = eval_ds[0]
example

{'step': 402,
 'prompt': 'First User Request: "I\'m planning a business class trip from JFK in New York to LAX in Los Angeles on December 15, 2024. Alex Johnson will be my traveling companion. I intend to use my credit card with label \'id_1234\' to cover the $4500 trip cost. I\'ve got my access token here: ABCD1234. Once booked, I\'ll need to cancel the trip immediately due to unexpected changes in my schedule."\n\nInvolved Classes: [\'TicketAPI\', \'TravelAPI\']\n\nFunction Documentation:\n\nTools for TicketAPI: [\'close_ticket\', \'create_ticket\', \'edit_ticket\', \'get_ticket\', \'get_user_tickets\', \'logout\', \'resolve_ticket\', \'ticket_get_login_status\', \'ticket_login\']\n\nTools for TravelAPI: [\'authenticate_travel\', \'book_flight\', \'cancel_booking\', \'compute_exchange_rate\', \'contact_customer_support\', \'get_all_credit_cards\', \'get_budget_fiscal_year\', \'get_credit_card_balance\', \'get_flight_cost\', \'get_nearest_airport_by_city\', \'list_all_airports\', \'pu

In [12]:
print(f"SYSTEM: {example['dataset_rows']['prompt'][0]['content']}")
print("---")
print(f"USER: {example['dataset_rows']['prompt'][1]['content']}")

SYSTEM: You are an expert in composing functions. You are given a question from a user and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to complete the task.
You have access to the following tools to help solve the task:

{"name": "close_ticket", "description": "Close a ticket.", "parameters": {"type": "dict", "properties": {"ticket_id": {"type": "integer", "description": "ID of the ticket to be closed. "}}, "required": ["ticket_id"]}, "response": {"type": "dict", "properties": {"status": {"type": "string", "description": "Status of the close operation."}}}}
{"name": "create_ticket", "description": "Create a ticket in the system and queue it.", "parameters": {"type": "dict", "properties": {"title": {"type": "string", "description": "Title of the ticket."}, "description": {"type": "string", "description": "Description of the ticket. Defaults to an empty string.", "default": ""}, "priority": {"type": "integer", "description": "

In [23]:
answer = json.loads(example["dataset_rows"]["answer"])
answer

[["book_flight(access_token='ABCD1234', card_id='id_1234', travel_date='2024-12-15', travel_from='JFK', travel_to='LAX', travel_class='business', travel_cost=4500.0)",
  "cancel_booking(access_token='ABCD1234', booking_id='3426812')"]]

In [14]:
completions = example["completion"]

In [27]:
states = [{"dataset_row": example["dataset_rows"]}]

In [15]:
len(completions)

19

In [19]:
env.rubric.tool_execution_reward_func([completions], [])

[0.044444444444444446]

In [28]:
completions[:1]

[{'content': 'To implement the steps outlined, I will break down the task into smaller sub-tasks and solve them one by one.\n\nFirst, I\'ll get the list of all available airports using the \'list_all_airports\' function:\n\n<tool>\n[{"name": "list_all_airports", "args": {}}]\n</tool>',
  'role': 'assistant'}]

In [32]:
states = [{"messages": completions[:1], "dataset_row": example["dataset_rows"]}]
states = trainer.env._initialize_environments(states)

In [33]:
states

[{'messages': [{'content': 'To implement the steps outlined, I will break down the task into smaller sub-tasks and solve them one by one.\n\nFirst, I\'ll get the list of all available airports using the \'list_all_airports\' function:\n\n<tool>\n[{"name": "list_all_airports", "args": {}}]\n</tool>',
    'role': 'assistant'}],
  'dataset_row': {'__index_level_0__': 330,
   'answer': '[["book_flight(access_token=\'ABCD1234\', card_id=\'id_1234\', travel_date=\'2024-12-15\', travel_from=\'JFK\', travel_to=\'LAX\', travel_class=\'business\', travel_cost=4500.0)", "cancel_booking(access_token=\'ABCD1234\', booking_id=\'3426812\')"]]',
   'ground_truth_bank': '[["book_flight(access_token=\'ABCD1234\', card_id=\'id_1234\', travel_date=\'2024-12-15\', travel_from=\'JFK\', travel_to=\'LAX\', travel_class=\'business\', travel_cost=4500.0)", "cancel_booking(access_token=\'ABCD1234\', booking_id=\'3426812\')"]]',
   'id': 'multi_turn_base_176',
   'initial_config': '{"TravelAPI": {"credit_card_lis

In [34]:
response = trainer.env.env_response(states[0], debug=True)

Parsed: namespace(reasoning=None, tool='[{"name": "list_all_airports", "args": {}}]')
Tool Name: list_all_airports
Tool Args: {}
Found method list_all_airports in class TravelAPI
[{"name": "list_all_airports", "args": {}}]
'successful_func_calls'


Exception: Error in env_response is not expected!! Error: Error here is not expected!! Error: 'successful_func_calls'

In [17]:
rubric

[<bound method BfclRubric.unified_reward_func of <verifiers.rubrics.bfcl_rubric.BfclRubric object at 0x7f8effdc4c90>>]

In [26]:
trainer.env.env_instances

{140250549197824: {'TravelAPI': {'main': <verifiers.envs.bfcl_envs.travel_booking.TravelAPI at 0x7f8e789c5ad0>,
   'ground_truth': <verifiers.envs.bfcl_envs.travel_booking.TravelAPI at 0x7f8ecc682f90>,
   'initial_instance': <verifiers.envs.bfcl_envs.travel_booking.TravelAPI at 0x7f8e789c5d90>}},
 140250550139776: {'TravelAPI': {'main': <verifiers.envs.bfcl_envs.travel_booking.TravelAPI at 0x7f8ec4168ed0>,
   'ground_truth': <verifiers.envs.bfcl_envs.travel_booking.TravelAPI at 0x7f8ec4168f10>,
   'initial_instance': <verifiers.envs.bfcl_envs.travel_booking.TravelAPI at 0x7f8ec4168f50>}},
 140249885628736: {'TwitterAPI': {'main': <verifiers.envs.bfcl_envs.posting_api.TwitterAPI at 0x7f8ea0388ed0>,
   'ground_truth': <verifiers.envs.bfcl_envs.posting_api.TwitterAPI at 0x7f8ec4169790>,
   'initial_instance': <verifiers.envs.bfcl_envs.posting_api.TwitterAPI at 0x7f8ec4169710>},
  'GorillaFileSystem': {'main': <verifiers.envs.bfcl_envs.gorilla_file_system.GorillaFileSystem at 0x7f8ec416af1

In [35]:
output = trainer.env.generate(
    prompts=[example["dataset_rows"]["prompt"]],
    llm=trainer.llm,
    sampling_params=trainer.sampling_params,
    dataset_rows=[example["dataset_rows"]],
    debug=True,  # True for detailed output
)

System Prompt: You are an expert in composing functions. You are given a question from a user and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to complete the task.
You have access to the following tools to help solve the task:

{"name": "close_ticket", "description": "Close a ticket.", "parameters": {"type": "dict", "properties": {"ticket_id": {"type": "integer", "description": "ID of the ticket to be closed. "}}, "required": ["ticket_id"]}, "response": {"type": "dict", "properties": {"status": {"type": "string", "description": "Status of the close operation."}}}}
{"name": "create_ticket", "description": "Create a ticket in the system and queue it.", "parameters": {"type": "dict", "properties": {"title": {"type": "string", "description": "Title of the ticket."}, "description": {"type": "string", "description": "Description of the ticket. Defaults to an empty string.", "default": ""}, "priority": {"type": "integer", "descript

KeyboardInterrupt: 

In [36]:
output = trainer.env.generate(
    prompts=[example["dataset_rows"]["prompt"]],
    llm=trainer.llm,
    sampling_params=trainer.sampling_params,
    dataset_rows=[example["dataset_rows"]],
    debug=False,  # True for detailed output
)

In [38]:
output["messages"]

[[{'role': 'assistant',
   'content': 'Let\'s break down this task step-by-step and use the available tools to make the booking.\n\n<reasoning>\n1. First, we need to contact customer support to get immediate support regarding the trip booking.\n2. Create a ticket to track the booking and its issues.\n3. Update the ticket with the required details for the booking.\n4. Add the flight cost to the ticket.\n5. Book the flight using the access token.\n6. Wait for the booking to be made.\n7. Once the booking is successful, notify customer support.\n8. Edit the ticket to notify about the cancellation.\n</reasoning>\n\n<tool>\n[{"name": "contact_customer_support", "args": {"booking_id": "ABC123", "message": "Need immediate support"}}, {"name": "book_flight", "args": {"access_token": "ABCD1234", "card_id": "id_1234", "travel_date": "2024-12-15", "travel_from": "JFK", "travel_to": "LAX", "travel_class": "economy", "travel_cost": 4500}}, {"name": "edit_ticket", "args": {"ticket_id": "ABC123", "upd

In [43]:
print(trainer.tokenizer.decode(output["ids"][0]))

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Let's break down this task step-by-step and use the available tools to make the booking.

<reasoning>
1. First, we need to contact customer support to get immediate support regarding the trip booking.
2. Create a ticket to track the booking and its issues.
3. Update the ticket with the required details for the booking.
4. Add the flight cost to the ticket.
5. Book the flight using the access token.
6. Wait for the booking to be made.
7. Once the booking is successful, notify customer support.
8. Edit the ticket to notify about the cancellation.
</reasoning>

<tool>
[{"name": "contact_customer_support", "args": {"booking_id": "ABC123", "message": "Need immediate support"}}, {"name": "book_flight", "args": {"access_token": "ABCD1234", "card_id": "id_1234", "travel_date": "2024-12-15", "travel_from": "JFK", "travel_to": "LAX", "travel_class": "economy", "travel_cost": 4500}}, {"name": "edit_ticket", "args": {"ticket_id": "ABC123", "updates": {"status": "Pending Ticket", "description": "Fl

In [47]:
len(output["ids"][0])

1722

In [49]:
len(output["mask"][0])

1722

In [51]:
masked = []
for x, y in zip(output["ids"][0], output["mask"][0]):
    if y == 1:
        masked.append(x)
    else:
        masked.append(0)
print(trainer.tokenizer.decode(masked))

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Let's break down this task step-by-step and use the available tools to make the booking.

<reasoning>
1. First, we need to contact customer support to get immediate support regarding the trip booking.
2. Create a ticket to track the booking and its issues.
3. Update the ticket with the required details for the booking.
4. Add the flight cost to the ticket.
5. Book the flight using the access token.
6. Wait for the booking to be made.
7. Once the booking is successful, notify customer support.
8. Edit the ticket to notify about the cancellation.
</reasoning>

<tool>
[{"name": "contact_customer_support", "args": {"booking_id": "ABC123", "message": "Need immediate support"}}, {"name": "book_flight", "args": {"access_token": "ABCD1234", "card_id": "id_1234", "travel_date": "2024-12-15", "travel_from": "JFK", "travel_to": "LAX", "travel_class": "economy", "travel_cost": 4500}}, {"name": "edit_ticket", "args": {"ticket_id": "ABC123", "updates": {"status": "Pending Ticket", "description": "Fl

In [45]:
trainer.tokenizer.decode([1, 1, 1, 1])

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


'""""'

In [40]:
output["ids"]

[[10061,
  594,
  1438,
  1495,
  419,
  3383,
  3019,
  14319,
  29208,
  323,
  990,
  279,
  2500,
  7375,
  311,
  1281,
  279,
  21857,
  382,
  27,
  19895,
  287,
  397,
  16,
  13,
  5512,
  11,
  582,
  1184,
  311,
  3645,
  6002,
  1824,
  311,
  633,
  13922,
  1824,
  8826,
  279,
  8411,
  21857,
  624,
  17,
  13,
  4230,
  264,
  11727,
  311,
  3754,
  279,
  21857,
  323,
  1181,
  4714,
  624,
  18,
  13,
  5549,
  279,
  11727,
  448,
  279,
  2567,
  3565,
  369,
  279,
  21857,
  624,
  19,
  13,
  2691,
  279,
  10971,
  2783,
  311,
  279,
  11727,
  624,
  20,
  13,
  5893,
  279,
  10971,
  1667,
  279,
  2615,
  3950,
  624,
  21,
  13,
  13824,
  369,
  279,
  21857,
  311,
  387,
  1865,
  624,
  22,
  13,
  9646,
  279,
  21857,
  374,
  6849,
  11,
  15456,
  6002,
  1824,
  624,
  23,
  13,
  8340,
  279,
  11727,
  311,
  15456,
  911,
  279,
  35835,
  624,
  522,
  19895,
  287,
  1339,
  27,
  14172,
  397,
  58,
  4913,
  606,
  788,
  330,
  6287,


In [53]:
state = output["states"][0]

In [57]:
num_func_matches = 0
num_func_total = 0
model_func_calls = state["successful_func_calls"]
ground_truth_func_calls = json.loads(state["dataset_row"]["answer"])
assert len(model_func_calls) == len(ground_truth_func_calls)

for model_calls, gt_calls_str in zip(model_func_calls, ground_truth_func_calls):
    gt_calls = [
        trainer.env.rubric._parse_function_call(call_str) for call_str in gt_calls_str
    ]

    def make_hashable(value):
        if isinstance(value, dict):
            return frozenset((k, make_hashable(v)) for k, v in value.items())
        elif isinstance(value, list):
            return tuple(make_hashable(item) for item in value)
        elif isinstance(value, set):
            return frozenset(make_hashable(item) for item in value)
        elif isinstance(value, tuple):
            return tuple(make_hashable(item) for item in value)
        try:
            hash(value)
            return value
        except TypeError:
            return str(value)

    comparable_model_calls = [
        (
            call["name"],
            frozenset((k, make_hashable(v)) for k, v in call["args"].items()),
        )
        for call in model_calls
    ]

    for call in gt_calls:
        if "args" in call:
            for key, value in call["args"].items():
                if isinstance(value, list):
                    call["args"][key] = tuple(value)
        else:
            raise Exception(
                "Error in Parsing Ground Truth Function Call is Not Expected!!"
            )

    comparable_gt_calls = [
        (
            call["name"],
            frozenset((k, make_hashable(v)) for k, v in call["args"].items()),
        )
        for call in gt_calls
    ]
    print(f"Comparable Model Calls: {comparable_model_calls}")
    print(f"Comparable Ground Truth Calls: {comparable_gt_calls}")

    is_match, _ = trainer.env.rubric._is_subsequence_unordered(
        comparable_gt_calls, comparable_model_calls
    )
    print(f"Is Subsequence: {is_match}")
    num_func_matches += int(is_match)
    num_func_total += 1
func_score = num_func_matches / num_func_total
print(
    f"Function Call Match Score: {func_score}, num_func_matches: {num_func_matches}, num_func_total: {num_func_total}"
)

Comparable Model Calls: [('book_flight', frozenset({('travel_cost', 4500), ('card_id', 'id_1234'), ('travel_to', 'LAX'), ('travel_class', 'economy'), ('travel_date', '2024-12-15'), ('access_token', 'ABCD1234'), ('travel_from', 'JFK')}))]
Comparable Ground Truth Calls: [('book_flight', frozenset({('travel_class', 'business'), ('travel_cost', 4500.0), ('card_id', 'id_1234'), ('travel_to', 'LAX'), ('travel_date', '2024-12-15'), ('access_token', 'ABCD1234'), ('travel_from', 'JFK')})), ('cancel_booking', frozenset({('booking_id', '3426812'), ('access_token', 'ABCD1234')}))]
Is Subsequence: False
Function Call Match Score: 0.0, num_func_matches: 0, num_func_total: 1
