In [1]:
import os
import re
os.environ["UNSLOTH_VLLM_STANDBY"] = "1"# [NEW] Extra 30% context lengths!

In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 8000 # Can increase for longer reasoning traces
lora_rank = 32 # Larger rank = smarter, but slower

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "LiquidAI/LFM2-350M",
    max_seq_length = max_seq_length,
    load_in_4bit = False, # False for LoRA 16bit
    fast_inference = False, # Enable vLLM fast inference
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.9, # Reduce if out of memory

    use_exact_model_name = True #for hugginface cache or repo mdoel name
)

model = FastLanguageModel.get_peft_model(
    model,
    r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = lora_rank*2, # *2 speeds up training
    use_gradient_checkpointing = "unsloth", # Reduces memory usage
    random_state = 3407,
)

  from .autonotebook import tqdm as notebook_tqdm


ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 10-14 03:11:01 [__init__.py:216] Automatically detected platform cuda.


W1014 03:11:01.445000 1181414 torch/utils/cpp_extension.py:2425] TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. 
W1014 03:11:01.445000 1181414 torch/utils/cpp_extension.py:2425] If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'] to specific architectures.


Switching to PyTorch attention since your Xformers is broken.

Requires Flash-Attention version >=2.7.1,<=2.8.2 but got 2.8.3.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.10.1: Fast Lfm2 patching. Transformers: 4.56.2. vLLM: 0.10.2.
   \\   /|    NVIDIA RTX 6000 Ada Generation. Num GPUs = 1. Max memory: 47.507 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.
Unsloth: Making `model.base_model.model.model` require gradients


In [3]:
tokenizer.chat_template

'{{- bos_token -}}\n{%- set system_prompt = "" -%}\n{%- set ns = namespace(system_prompt="") -%}\n{%- if messages[0]["role"] == "system" -%}\n\t{%- set ns.system_prompt = messages[0]["content"] -%}\n\t{%- set messages = messages[1:] -%}\n{%- endif -%}\n{%- if tools -%}\n\t{%- set ns.system_prompt = ns.system_prompt + ("\\n" if ns.system_prompt else "") + "List of tools: <|tool_list_start|>[" -%}\n\t{%- for tool in tools -%}\n\t\t{%- if tool is not string -%}\n            {%- set tool = tool | tojson -%}\n\t\t{%- endif -%}\n\t\t{%- set ns.system_prompt = ns.system_prompt + tool -%}\n        {%- if not loop.last -%}\n            {%- set ns.system_prompt = ns.system_prompt + ", " -%}\n        {%- endif -%}\n\t{%- endfor -%}\n\t{%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}\n{%- endif -%}\n{%- if ns.system_prompt -%}\n\t{{- "<|im_start|>system\\n" + ns.system_prompt + "<|im_end|>\\n" -}}\n{%- endif -%}\n{%- for message in messages -%}\n\t{{- "<|im_start|>" + message

In [4]:
# # Clean GLM-style template with line-by-line tool format
# glm_chat_template = '''{{- bos_token -}}
# {%- set system_prompt = "" -%}
# {%- set ns = namespace(system_prompt="") -%}
# {%- if messages[0]["role"] == "system" -%}
# 	{%- set ns.system_prompt = messages[0]["content"] -%}
# 	{%- set messages = messages[1:] -%}
# {%- endif -%}
# {%- if tools -%}
# 	{%- set ns.system_prompt = ns.system_prompt + ("\\n" if ns.system_prompt else "") + "# Tools\\nYou may call one or more functions to assist with the user query.\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\\n" -%}
# 	{%- for tool in tools -%}
# 		{%- if tool is not string -%}
# 			{%- set tool = tool.function | tojson -%}
# 		{%- endif -%}
# 		{%- set ns.system_prompt = ns.system_prompt + tool -%}
# 		{%- if not loop.last -%}
# 			{%- set ns.system_prompt = ns.system_prompt + "\\n" -%}
# 		{%- endif -%}
# 	{%- endfor -%}
# 	{%- set ns.system_prompt = ns.system_prompt + "\\n</tools>\\nFor each function call, output the function name and arguments within the following XML format:\\n<tool_call>{function-name}\\n<arg_key>{arg_key}</arg_key>\\n<arg_value>{arg_value}</arg_value>\\n...\\n</tool_call>" -%}
# {%- endif -%}
# {%- if ns.system_prompt -%}
# 	{{- "<|im_start|>system\\n" + ns.system_prompt + "<|im_end|>\\n" -}}
# {%- endif -%}
# {%- for message in messages -%}
# 	{{- "<|im_start|>" + message["role"] + "\\n" -}}
# 	{%- set content = message["content"] -%}
# 	{%- if content is not string -%}
# 		{%- set content = content | tojson -%}
# 	{%- endif -%}
# 	{%- if message["role"] == "tool" -%}
# 	{%- if message.get('name') -%}
#  		{%- set content = "<|observation|>\n<tool_name>" + message.get('name', 'unknown') + "</tool_name>\n<tool_response>" + content + "</tool_response>" -%}
# 	{%- else -%}
# 		{%- set content = "\n<|observation|>\n<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
# 	{%- endif -%}
# 	{%- endif -%}
# 	{{- content + "<|im_end|>\\n" -}}
# {%- endfor -%}
# {%- if add_generation_prompt -%}
# 	{{- "<|im_start|>assistant\\n" -}}
# {%- endif -%}'''

# tokenizer.chat_template = glm_chat_template

In [5]:
# Clean GLM-style template with line-by-line tool format
glm_chat_template = '''{{- bos_token -}}
{%- set system_prompt = "" -%}
{%- set ns = namespace(system_prompt="") -%}
{%- if messages[0]["role"] == "system" -%}
	{%- set ns.system_prompt = messages[0]["content"] -%}
	{%- set messages = messages[1:] -%}
{%- endif -%}
{%- if tools -%}
	{%- set ns.system_prompt = ns.system_prompt + ("\\n" if ns.system_prompt else "") + "# Tools\\nYou may call one or more functions to assist with the user query.\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\\n" -%}
	{%- for tool in tools -%}
		{%- if tool is not string -%}
			{%- set tool = tool.function | tojson -%}
		{%- endif -%}
		{%- set ns.system_prompt = ns.system_prompt + tool -%}
		{%- if not loop.last -%}
			{%- set ns.system_prompt = ns.system_prompt + "\\n" -%}
		{%- endif -%}
	{%- endfor -%}
	{%- set ns.system_prompt = ns.system_prompt + "\\n</tools>\\nFor each function call, output the function name and arguments within the following XML format:\\n<tool_call>{function-name}\\n<arg_key>{arg_key}</arg_key>\\n<arg_value>{arg_value}</arg_value>\\n...\\n</tool_call>" -%}
{%- endif -%}
{%- if ns.system_prompt -%}
	{{- "<|im_start|>system\\n" + ns.system_prompt + "<|im_end|>\\n" -}}
{%- endif -%}
{%- for message in messages -%}
	{{- "<|im_start|>" + message["role"] + "\\n" -}}
	{%- set content = message["content"] -%}
	{%- if content is not string -%}
		{%- set content = content | tojson -%}
	{%- endif -%}
	{%- if message["role"] == "tool" -%}
	{%- if message.get('name') -%}
 		{%- set content = "<|observation|>\n<tool_name>" + message.get('name', 'unknown') + "</tool_name>\n<tool_response>" + content + "</tool_response>" -%}
	{%- else -%}
		{%- set content = "\n<|observation|>\n<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
	{%- endif -%}
	{%- endif -%}
	{{- content + "<|im_end|>\\n" -}}
{%- endfor -%}
{%- if add_generation_prompt -%}
	{{- "<|im_start|>assistant\\n" -}}
{%- endif -%}'''

tokenizer.chat_template = glm_chat_template

In [6]:
base_tools = {
  "web_search": {
    "type": "function",
    "function": {
      "name": "web_search",
      "description": "Search the web",
      "parameters": {
        "type": "object",
        "properties": {
          "query": {
            "type": "string",
            "description": "Search query"
          }
        },
        "required": [
          "query"
        ]
      },
      "return": {
        "type": "array",
        "items": {
          "type": "object"
        },
        "description": "The list of items having url,title,description."
      }
    }
  },
  "think": {
    "type": "function",
    "function": {
      "name": "think",
      "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.",
      "parameters": {
        "type": "object",
        "properties": {
          "thought": {
            "type": "string",
            "description": "A thought to think about."
          }
        },
        "required": [
          "thought"
        ]
      }
    }
  }
}

"""
  "response": {
    "type": "function",
    "function": {
      "name": "response",
      "description": "Send a message back to the user.",
      "parameters": {
        "type": "object",
        "properties": {
          "message": {
            "type": "string",
            "description": "The message to send to the user."
          }
        },
        "required": [
          "message"
        ]
      }
    }
  }
"""

'\n  "response": {\n    "type": "function",\n    "function": {\n      "name": "response",\n      "description": "Send a message back to the user.",\n      "parameters": {\n        "type": "object",\n        "properties": {\n          "message": {\n            "type": "string",\n            "description": "The message to send to the user."\n          }\n        },\n        "required": [\n          "message"\n        ]\n      }\n    }\n  }\n'

In [7]:
# Complete conversation with user, assistant, and tool messages (no think tags)
messages = [
    {"role": "user", "content": "What is the current status of candidate ID 12345?"},
    {"role": "assistant", "content": '<tool_call>get_candidate_status\n<arg_key>candidate_id</arg_key>\n<arg_value>12345</arg_value>\n</tool_call>'},
    {"role": "tool", "content": '{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}'},
    {"role": "assistant", "content": "The candidate with ID 12345 is currently in the \"Interview Scheduled\" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20."},
    {"role": "user", "content": "Can you also search for all candidates for the position of Data Scientist?"},
    {"role": "assistant", "content": '<tool_call>search_database\n<arg_key>query</arg_key>\n<arg_value>Data Scientist</arg_value>\n<arg_key>limit</arg_key>\n<arg_value>10</arg_value>\n</tool_call>'},
    {"role": "tool", "name":"search_database","content": '[{"candidate_id": "67890", "name": "John Doe", "status": "Applied"}, {"candidate_id": "67891", "name": "Jane Smith", "status": "Interview Completed"}]'},
    {"role": "assistant", "content": "I found 2 candidates for the Data Scientist position:\n1. John Doe (ID: 67890) - Status: Applied\n2. Jane Smith (ID: 67891) - Status: Interview Completed"}
]

formatted = tokenizer.apply_chat_template(
    messages,
    tools=list(base_tools.values()),
    tokenize=False, 
    add_generation_prompt=False
)

sp = re.split(r"<\|im_start\|>system\s*|\s*<\|im_end\|>", formatted, maxsplit=2)[1].strip()
print(sp)

# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
</tools>
For each function call, output the function name and arguments within the following XML format:
<tool_call>{function-name}
<arg_key>{ar

In [8]:
# Complete conversation with user, assistant, and tool messages (no think tags)
messages = [
    {"role": "user", "content": "hiiii"}
]

formatted = tokenizer.apply_chat_template(
    messages, 
    tools=list(base_tools.values()), 
    tokenize=False, 
    add_generation_prompt=True
)

print(formatted)

<|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
</tools>
For each function call, output the function name and arguments within the following XML format:
<tool

In [9]:
# Complete conversation with user, assistant, and tool messages (no think tags)
messages = [
    {"role": "user", "content": "What is the current status of candidate ID 12345?"},
    {"role": "assistant", "content": '<tool_call>get_candidate_status\n<arg_key>candidate_id</arg_key>\n<arg_value>12345</arg_value>\n</tool_call>'},
    {"role": "tool", "content": '{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}'},
    {"role": "assistant", "content": "The candidate with ID 12345 is currently in the \"Interview Scheduled\" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20."},
    {"role": "user", "content": "Can you also search for all candidates for the position of Data Scientist?"},
    {"role": "assistant", "content": '<tool_call>search_database\n<arg_key>query</arg_key>\n<arg_value>Data Scientist</arg_value>\n<arg_key>limit</arg_key>\n<arg_value>10</arg_value>\n</tool_call>'},
    {"role": "tool", "name":"search_database","content": '[{"candidate_id": "67890", "name": "John Doe", "status": "Applied"}, {"candidate_id": "67891", "name": "Jane Smith", "status": "Interview Completed"}]'},
    {"role": "assistant", "content": "I found 2 candidates for the Data Scientist position:\n1. John Doe (ID: 67890) - Status: Applied\n2. Jane Smith (ID: 67891) - Status: Interview Completed"}
]

formatted = tokenizer.apply_chat_template(
    messages,
    tools=list(base_tools.values()),
    tokenize=False, 
    add_generation_prompt=False
)

print(formatted)

<|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
</tools>
For each function call, output the function name and arguments within the following XML format:
<tool

## Pre fine-tuning for formatting

In [10]:
from datasets import load_dataset
dataset = load_dataset("Team-ACE/ToolACE")
dataset

DatasetDict({
    train: Dataset({
        features: ['system', 'conversations'],
        num_rows: 11300
    })
})

In [11]:
import re

def convert_assistant_response(text: str) -> str:
    text = text.strip()
    # Match pattern: [FunctionName(arg1="val1", arg2='val2', ...)]
    match = re.match(r'\[(\w+(?:\s+\w+)*)\((.*)\)\]', text)
    if not match:
        return text  # Not a tool call â†’ return as-is

    func_name = match.group(1)
    args_str = match.group(2)

    # Parse key=value pairs (support both "..." and '...')
    args = []
    for pair in args_str.split(','):
        pair = pair.strip()
        if '=' in pair:
            key, value = pair.split('=', 1)
            key = key.strip()
            value = value.strip().strip('"').strip("'")
            args.append((key, value))

    # Build XML-style output
    xml_lines = [f"<tool_call>{func_name}"]
    for k, v in args:
        xml_lines.append(f"<arg_key>{k}</arg_key>")
        xml_lines.append(f"<arg_value>{v}</arg_value>")
    xml_lines.append("</tool_call>")
    
    return "\n".join(xml_lines)

inp = '[Get Zip Code Information(country="us", postal_code="10001"),Get Zip Code sdf(country="us", postal_code="10001")]'
print(convert_assistant_response(inp))

<tool_call>Get Zip Code Information
<arg_key>country</arg_key>
<arg_value>us</arg_value>
<arg_key>postal_code</arg_key>
<arg_value>10001")</arg_value>
<arg_key>Get Zip Code sdf(country</arg_key>
<arg_value>us</arg_value>
<arg_key>postal_code</arg_key>
<arg_value>10001</arg_value>
</tool_call>


In [12]:
import re

def convert_assistant_response_fixed(text: str) -> str:
    """
    Fixed version that handles multiple function calls properly.
    Converts format: [FunctionName(arg1="val1", arg2="val2")]
    To: <tool_call>FunctionName\n<arg_key>arg1</arg_key>\n<arg_value>val1</arg_value>...
    """
    text = text.strip()
    
    # Check if it looks like a tool call format
    if not text.startswith('[') or not text.endswith(']'):
        return text  # Not a tool call â†’ return as-is
    
    # Remove outer brackets
    text = text[1:-1].strip()
    
    # Split multiple function calls by '),FunctionName(' or '),' patterns
    # This regex finds function boundaries
    function_pattern = r'(\w+(?:\s+\w+)*)\s*\((.*?)\)(?:,|$)'
    matches = re.finditer(function_pattern, text)
    
    results = []
    for match in matches:
        func_name = match.group(1).strip()
        args_str = match.group(2).strip()
        
        # Parse key=value pairs more carefully
        args = []
        # Use a more robust regex to handle quoted values with commas inside
        arg_pattern = r'(\w+)\s*=\s*(["\'])([^"\']*)\2'
        for arg_match in re.finditer(arg_pattern, args_str):
            key = arg_match.group(1)
            value = arg_match.group(3)  # The content between quotes
            args.append((key, value))
        
        # Build XML-style output for this function call
        xml_lines = [f"<tool_call>{func_name}"]
        for k, v in args:
            xml_lines.append(f"<arg_key>{k}</arg_key>")
            xml_lines.append(f"<arg_value>{v}</arg_value>")
        xml_lines.append("</tool_call>")
        
        results.append("\n".join(xml_lines))
    
    return "\n\n".join(results) if results else text

# Test cases
test_cases = [
    '[Get Zip Code Information(country="us", postal_code="10001")]',
    '[Get Zip Code Information(country="us", postal_code="10001"),Get Zip Code sdf(country="us", postal_code="10001")]',
    '[SEC Filings(identifier="AAPL")]',
]

print("Testing fixed function:\n")
for test in test_cases:
    print(f"Input: {test}")
    print(f"Output:\n{convert_assistant_response_fixed(test)}")
    print("-" * 80)

Testing fixed function:

Input: [Get Zip Code Information(country="us", postal_code="10001")]
Output:
<tool_call>Get Zip Code Information
<arg_key>country</arg_key>
<arg_value>us</arg_value>
<arg_key>postal_code</arg_key>
<arg_value>10001</arg_value>
</tool_call>
--------------------------------------------------------------------------------
Input: [Get Zip Code Information(country="us", postal_code="10001"),Get Zip Code sdf(country="us", postal_code="10001")]
Output:
<tool_call>Get Zip Code Information
<arg_key>country</arg_key>
<arg_value>us</arg_value>
<arg_key>postal_code</arg_key>
<arg_value>10001</arg_value>
</tool_call>

<tool_call>Get Zip Code sdf
<arg_key>country</arg_key>
<arg_value>us</arg_value>
<arg_key>postal_code</arg_key>
<arg_value>10001</arg_value>
</tool_call>
--------------------------------------------------------------------------------
Input: [SEC Filings(identifier="AAPL")]
Output:
<tool_call>SEC Filings
<arg_key>identifier</arg_key>
<arg_value>AAPL</arg_value>

In [13]:
import json

def prepare_tool(text):
    import json
    data = json.loads(text)
    tool_results = []
    for x in data:
        tool_results.append({"role":"tool","name":x['name'],"content":json.dumps((x['results']))})
    return tool_results

def convert_to_messages(example):
    _messages = []
    tools = example['system'].split("Here is a list of functions in JSON format that you can invoke:")[-1].strip().split("Should you decide to return the function call(s)")[0].replace(". \n","")
    for x in example['conversations']:
        if x['from'] =="assistant":
            _messages.append({"role":x['from'],"name":"","content":convert_assistant_response_fixed(x['value'])})
        elif x['from']=='tool':
            _messages.extend(prepare_tool(x['value']))
        else:
            _messages.append({"role":x['from'],"name":"","content":x['value']})

    return {"messages":_messages,"tools":tools}

dataset = dataset.map(convert_to_messages,num_proc=32)
dataset['train'][0]['messages']
# convert_to_messages(dataset['train'][0])

[{'content': "I'm considering investing and I'd like to know what's happening in the market right now. Could you get me the top market trends in the US?",
  'name': '',
  'role': 'user'},
 {'content': '<tool_call>Market Trends API\n<arg_key>trend_type</arg_key>\n<arg_value>MARKET_INDEXES</arg_value>\n<arg_key>country</arg_key>\n<arg_value>us</arg_value>\n</tool_call>',
  'name': '',
  'role': 'assistant'},
 {'content': '{"trends": [{"name": "S&P 500", "description": "Standard & Poor\'s 500 Index is a market-capitalization-weighted index of the 500 largest U.S. publicly traded companies.", "data": {"current_value": "4172.80", "percentage_change": "+0.68%"}}, {"name": "DOW J", "description": "Dow Jones Industrial Average is a price-weighted average of 30 blue-chip stocks that are generally the leaders in their industry.", "data": {"current_value": "34479.60", "percentage_change": "+0.47%"}}, {"name": "NASDAQ", "description": "The NASDAQ Composite is a broad-based capitalization-weighted 

In [14]:
def filter_tool(example):
    try:
            json.loads(example['tools'])
    except Exception as e:
            return False
    return True

dataset = dataset.filter(filter_tool,num_proc=32)
dataset

DatasetDict({
    train: Dataset({
        features: ['system', 'conversations', 'messages', 'tools'],
        num_rows: 10782
    })
})

In [15]:
import json

def normalize_tool(tool):
    # If already in correct format, return as-is
    if "type" in tool and tool.get("type") == "function" and "function" in tool:
        return tool

    # Otherwise, assume it's the old flat format
    return {
        "type": "function",
        "function": {
            "name": tool["name"],
            "description": tool.get("description", ""),
            "parameters": {
                "type": "object",
                "properties": tool.get("parameters", {}).get("properties", {}),
                "required": tool.get("parameters", {}).get("required", [])
                # Note: ignore top-level "required": null
            }
        }
    }

def apply_chat_template(example):
    dynamic_tools = json.loads(example['tools'])
    normalized_dynamic_tools = [normalize_tool(t) for t in dynamic_tools]
    all_tools = list(base_tools.values()) + normalized_dynamic_tools

    prompt = tokenizer.apply_chat_template(
        example['messages'],
        tools=all_tools,
        tokenize=False,
        add_generation_prompt=False
    )
    return {"prompt": prompt}

dataset = dataset.map(apply_chat_template,num_proc=32)
dataset


DatasetDict({
    train: Dataset({
        features: ['system', 'conversations', 'messages', 'tools', 'prompt'],
        num_rows: 10782
    })
})

In [16]:
print(dataset['train'][1]['messages'])

[{'content': 'Could you please find me some quotes about "inspiration"?', 'name': '', 'role': 'user'}, {'content': '<tool_call>Quotes by Keywords\n<arg_key>word</arg_key>\n<arg_value>inspiration</arg_value>\n</tool_call>', 'name': '', 'role': 'assistant'}, {'content': '{"quotes": [{"text": "The only way to achieve the impossible is to believe it is possible.", "author": "Charles Kingsleigh"}, {"text": "Don\'t watch the clock; do what it does. Keep going.", "author": "Sam Levenson"}, {"text": "Success is not the key to happiness. Happiness is the key to success. If you love what you are doing, you will be successful.", "author": "Albert Schweitzer"}]}', 'name': 'Quotes by Keywords', 'role': 'tool'}, {'content': 'Here are some inspiration quotes for you:\n\n1. "The only way to achieve the impossible is to believe it is possible." - Charles Kingsleigh\n2. "Don\'t watch the clock; do what it does. Keep going." - Sam Levenson\n3. "Success is not the key to happiness. Happiness is the key to

In [17]:
print(dataset['train'][1]['prompt'])

<|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
{"name": "Quotes by Keywords", "description": "Returns a list of quotes containing the specified keyword.", "p

In [18]:
dataset['train'][0]['tools']

'[{"name": "newAddress", "description": "Generates a new Ethereum address that can be used to send or receive funds. Do not lose the password! We can\'t restore access to an address if you lose it.", "parameters": {"type": "dict", "properties": {"password": {"description": "The password for the new Ethereum address", "type": "string"}}, "required": ["password"]}, "required": null}, {"name": "Market Trends API", "description": "Get the latest market trends and relevant news for a specified country and language.", "parameters": {"type": "dict", "properties": {"trend_type": {"description": "Trend type.", "type": "string", "enum": ["MARKET_INDEXES", "MOST_ACTIVE", "GAINERS", "LOSERS", "CRYPTO", "CURRENCIES", "CLIMATE_LEADERS"]}, "country": {"description": "The country for which to get trends, specified as a 2-letter country code - see ISO 3166.", "type": "string", "default": "us"}, "language": {"description": "The language to use for the results, specified as a 2-letter language code - see

## Let's now pre fine-tune the model so it follows our custom GRPO formatting!

In [19]:
from trl import SFTTrainer, SFTConfig

train_dataset_sft = dataset['train'].select(range(250))
eval_dataset_sft = dataset['train'].select(range(250,750))

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset_sft,
    eval_dataset=eval_dataset_sft,
    args = SFTConfig(
        dataset_text_field = "prompt",
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 4,# Increase to 4 for smoother training
        warmup_steps = 5,
        num_train_epochs = 2, # Set this for 1 full training run.
        learning_rate = 2e-5, # Reduce to 2e-5 for long training runs
        logging_steps = 5,
        # optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,

        report_to = "tensorboard", # Use this for WandB etc
        max_steps=100,

        # eval_strategy="steps",  # evaluate during training
        # eval_steps=10,                # evaluate every 10 steps
        # eval_delay=10,                # wait until step 10 to start evaluating
        # save_strategy="steps",        # optional: save model at eval points
        # save_steps=10,                # optional: save every eval

    ),
)
trainer.train()

#type(model) peft.peft_model.PeftModelForCausalLM
model.save_pretrained("lfm2-sft-tool")


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 250 | Num Epochs = 7 | Total steps = 100
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 983,040 of 355,467,008 (0.28% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,4.1791
10,3.8959
15,3.7005
20,3.5737
25,3.2429
30,2.9673
35,2.8875
40,2.694
45,2.4969
50,2.5734


In [20]:
text = tokenizer.apply_chat_template(
    [{"role":"user","content":"search for when is modi born? and who won ipl match"}],
    tools = list(base_tools.values()),
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    temperature = 0.3,
    max_new_tokens = 2048,
    streamer = TextStreamer(tokenizer, skip_prompt = False),
)

<|startoftext|><|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
</tools>
For each function call, output the function name and arguments within the following XM

<tool_call>{web_search}
<query>When is Modi born? Who won IPL match?</query>
</tool_call><|im_end|>


In [21]:
from peft import PeftModel
from transformers import AutoModelForCausalLM

base_model = AutoModelForCausalLM.from_pretrained("LiquidAI/LFM2-350M")
sft_peft_model = PeftModel.from_pretrained(base_model, "lfm2-sft-tool").to('cuda')
text = tokenizer.apply_chat_template(
    [{"role":"user","content":"search for when is modi born? and who won ipl match"}],
    tools = list(base_tools.values()),
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
)

from transformers import TextStreamer
_ = sft_peft_model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    temperature = 0.3,
    max_new_tokens = 2048,
    streamer = TextStreamer(tokenizer, skip_prompt = False),
)

<|startoftext|><|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
</tools>
For each function call, output the function name and arguments within the following XM

In [22]:
torch.cuda.empty_cache()
import gc
gc.collect()

1028

## GRPO Training

In [71]:
def get_system_prompt_based_messages(example):
    sp = re.split(r"<\|im_start\|>system\s*|\s*<\|im_end\|>", example['prompt'], maxsplit=2)[1].strip()
    return {"prompt":[{"role":"system",'content':sp},*example['messages']]}

grpo_dataset = dataset.map(get_system_prompt_based_messages,num_proc=32, remove_columns=["messages","system", 'conversations', 'tools'])
grpo_dataset


DatasetDict({
    train: Dataset({
        features: ['prompt'],
        num_rows: 10782
    })
})

In [72]:
# def get_answer(exp): return {"answer":"22","samples":[12]}
# grpo_dataset = grpo_dataset.map(get_answer,num_proc=32)
# grpo_dataset
grpo_dataset.save_to_disk('tool-ace-dataset-grpo',num_proc=32)


Saving the dataset (32/32 shards): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 10782/10782 [00:01<00:00, 10417.74 examples/s]


In [51]:
grpo_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'answer', 'samples'],
        num_rows: 10782
    })
})

In [74]:
import unsloth
import re
from datasets import load_dataset
from unsloth import FastLanguageModel

# Load model using Unsloth's FastLanguageModel
max_seq_length = 8000
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "lfm2-sft-tool",  # Load the SFT fine-tuned model directly
    max_seq_length = max_seq_length,
    load_in_4bit = False,
    fast_inference = False,
    gpu_memory_utilization = 0.9,
)

dataset = load_dataset('tool-ace-dataset-grpo',num_proc=32)
print(dataset)

glm_chat_template = '''{{- bos_token -}}
{%- set system_prompt = "" -%}
{%- set ns = namespace(system_prompt="") -%}
{%- if messages[0]["role"] == "system" -%}
	{%- set ns.system_prompt = messages[0]["content"] -%}
	{%- set messages = messages[1:] -%}
{%- endif -%}
{%- if tools -%}
	{%- set ns.system_prompt = ns.system_prompt + ("\\n" if ns.system_prompt else "") + "# Tools\\nYou may call one or more functions to assist with the user query.\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\\n" -%}
	{%- for tool in tools -%}
		{%- if tool is not string -%}
			{%- set tool = tool.function | tojson -%}
		{%- endif -%}
		{%- set ns.system_prompt = ns.system_prompt + tool -%}
		{%- if not loop.last -%}
			{%- set ns.system_prompt = ns.system_prompt + "\\n" -%}
		{%- endif -%}
	{%- endfor -%}
	{%- set ns.system_prompt = ns.system_prompt + "\\n</tools>\\nFor each function call, output the function name and arguments within the following XML format:\\n<tool_call>{function-name}\\n<arg_key>{arg_key}</arg_key>\\n<arg_value>{arg_value}</arg_value>\\n...\\n</tool_call>" -%}
{%- endif -%}
{%- if ns.system_prompt -%}
	{{- "<|im_start|>system\\n" + ns.system_prompt + "<|im_end|>\\n" -}}
{%- endif -%}
{%- for message in messages -%}
	{{- "<|im_start|>" + message["role"] + "\\n" -}}
	{%- set content = message["content"] -%}
	{%- if content is not string -%}
		{%- set content = content | tojson -%}
	{%- endif -%}
	{%- if message["role"] == "tool" -%}
	{%- if message.get('name') -%}
 		{%- set content = "<|observation|>\n<tool_name>" + message.get('name', 'unknown') + "</tool_name>\n<tool_response>" + content + "</tool_response>" -%}
	{%- else -%}
		{%- set content = "\n<|observation|>\n<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
	{%- endif -%}
	{%- endif -%}
	{{- content + "<|im_end|>\\n" -}}
{%- endfor -%}
{%- if add_generation_prompt -%}
	{{- "<|im_start|>assistant\\n" -}}
{%- endif -%}'''
base_tools = {
  "web_search": {
    "type": "function",
    "function": {
      "name": "web_search",
      "description": "Search the web",
      "parameters": {
        "type": "object",
        "properties": {
          "query": {
            "type": "string",
            "description": "Search query"
          }
        },
        "required": [
          "query"
        ]
      },
      "return": {
        "type": "array",
        "items": {
          "type": "object"
        },
        "description": "The list of items having url,title,description."
      }
    }
  },
  "think": {
    "type": "function",
    "function": {
      "name": "think",
      "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.",
      "parameters": {
        "type": "object",
        "properties": {
          "thought": {
            "type": "string",
            "description": "A thought to think about."
          }
        },
        "required": [
          "thought"
        ]
      }
    }
  }
}

tokenizer.chat_template = glm_chat_template

text = tokenizer.apply_chat_template(
    [{"role":"user","content":"search for when is modi born? and who won ipl match"}],
    tools = list(base_tools.values()),
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
)

==((====))==  Unsloth 2025.10.1: Fast Lfm2 patching. Transformers: 4.56.2. vLLM: 0.10.2.
   \\   /|    NVIDIA RTX 6000 Ada Generation. Num GPUs = 1. Max memory: 47.507 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.
DatasetDict({
    train: Dataset({
        features: ['prompt'],
        num_rows: 10782
    })
})


In [75]:
from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    temperature = 0.3,
    max_new_tokens = 2048,
    streamer = TextStreamer(tokenizer, skip_prompt = False),
)

<|startoftext|><|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
</tools>
For each function call, output the function name and arguments within the following XM

In [None]:


def check_answer(prompts, completions, **kwargs):
    scores = []
    for comp in completions:
        # Extract the content string (assuming structure like [{"content": "..."}])
        content = comp[0]['content']
        length = len(content)
        
        # Define max desirable length
        max_good_length = 500
        
        if length <= max_good_length:
            # Shorter = better â†’ higher score
            # Normalize: score = 1 - (length / max_good_length)
            # So 0 chars â†’ 1.0, 500 chars â†’ 0.0
            score = 1.0 - (length / max_good_length)
        else:
            # Penalize long responses: give low or zero reward
            score = 0.0
        
        # Optional: keep 2 decimal places
        score = round(score, 2)
        scores.append(score)
    
    print("Scores:", scores)
    return scores


maximum_length = 6000
max_prompt_length = maximum_length + 1 # + 1 just in case!
max_completion_length = max_seq_length - max_prompt_length

from unsloth import vLLMSamplingParams
vllm_sampling_params = vLLMSamplingParams(
    min_p = 0.1,
    top_p = 1.0,
    top_k = -1,
    seed = 3407,
    stop = [tokenizer.eos_token],
    include_stop_str_in_output = True,
)

from trl import GRPOConfig, GRPOTrainer

# First create the config without vllm_sampling_params
training_args = GRPOConfig(
    temperature = 1.0,
    learning_rate = 5e-6,
    weight_decay = 0.01,
    warmup_ratio = 0.1,
    lr_scheduler_type = "linear",
    # optim = "adamw_8bit",
    logging_steps = 1,
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 1, # Increase to 4 for smoother training
    num_generations = 4, # Decrease if out of memory
    max_prompt_length = max_prompt_length,
    max_completion_length = max_completion_length,
    # num_train_epochs = 1, # Set to 1 for a full training run
    max_steps = 100,
    save_steps = 100,
    report_to = "tensorboard", # Can use Weights & Biases
    output_dir = "outputs",
    torch_compile = False,  # Disable torch.compile to avoid dimension mismatch errors

    # For optional training + evaluation
    # fp16_full_eval = True,
    # per_device_eval_batch_size = 4,
    # eval_accumulation_steps = 1,
    # eval_strategy = "steps",
    # eval_steps = 1,
)

# Add vllm_sampling_params after creation to avoid JSON serialization issues
training_args.vllm_sampling_params = vllm_sampling_params

# Patch the to_dict method to exclude vllm_sampling_params from serialization
original_to_dict = training_args.to_dict
def patched_to_dict():
    d = original_to_dict()
    # Remove vllm_sampling_params from the dict to avoid JSON serialization error
    d.pop('vllm_sampling_params', None)
    return d
training_args.to_dict = patched_to_dict

# For optional training + evaluation
train_dataset_for_grpo = dataset['train'].select(range(250,300))
# new_dataset = dataset.train_test_split(test_size = 0.01)

tokenizer.chat_template = glm_chat_template
trainer = GRPOTrainer(
    model = model,
    processing_class = tokenizer,
    reward_funcs = [
        # match_format_exactly,
        # match_format_approximately,
        check_answer,
        # check_numbers,
    ],
    args = training_args,
    train_dataset = train_dataset_for_grpo,

    # For optional training + evaluation
    # train_dataset = new_dataset["train"],
    # eval_dataset = new_dataset["test"],
)
trainer.train()


# from transformers import TextStreamer
# _ = sft_peft_model.generate(
#     **tokenizer(text, return_tensors = "pt").to("cuda"),
#     temperature = 0.3,
#     max_new_tokens = 2048,
#     streamer = TextStreamer(tokenizer, skip_prompt = False),
# )
