In [1]:
import os
os.environ["UNSLOTH_VLLM_STANDBY"] = "1"# [NEW] Extra 30% context lengths!

In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 8000 # Can increase for longer reasoning traces
lora_rank = 32 # Larger rank = smarter, but slower

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "LiquidAI/LFM2-350M",
    max_seq_length = max_seq_length,
    load_in_4bit = False, # False for LoRA 16bit
    fast_inference = False, # Enable vLLM fast inference
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.9, # Reduce if out of memory

    use_exact_model_name = True #for hugginface cache or repo mdoel name
)

model = FastLanguageModel.get_peft_model(
    model,
    r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = lora_rank*2, # *2 speeds up training
    use_gradient_checkpointing = "unsloth", # Reduces memory usage
    random_state = 3407,
)

  from .autonotebook import tqdm as notebook_tqdm


ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 10-08 02:58:57 [__init__.py:216] Automatically detected platform cuda.


W1008 02:58:57.884000 592413 torch/utils/cpp_extension.py:2425] TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. 
W1008 02:58:57.884000 592413 torch/utils/cpp_extension.py:2425] If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'] to specific architectures.


Switching to PyTorch attention since your Xformers is broken.

Requires Flash-Attention version >=2.7.1,<=2.8.2 but got 2.8.3.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.10.1: Fast Lfm2 patching. Transformers: 4.56.2. vLLM: 0.10.2.
   \\   /|    NVIDIA RTX 6000 Ada Generation. Num GPUs = 1. Max memory: 47.507 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.
Unsloth: Making `model.base_model.model.model` require gradients


In [3]:
tokenizer.chat_template

'{{- bos_token -}}\n{%- set system_prompt = "" -%}\n{%- set ns = namespace(system_prompt="") -%}\n{%- if messages[0]["role"] == "system" -%}\n\t{%- set ns.system_prompt = messages[0]["content"] -%}\n\t{%- set messages = messages[1:] -%}\n{%- endif -%}\n{%- if tools -%}\n\t{%- set ns.system_prompt = ns.system_prompt + ("\\n" if ns.system_prompt else "") + "List of tools: <|tool_list_start|>[" -%}\n\t{%- for tool in tools -%}\n\t\t{%- if tool is not string -%}\n            {%- set tool = tool | tojson -%}\n\t\t{%- endif -%}\n\t\t{%- set ns.system_prompt = ns.system_prompt + tool -%}\n        {%- if not loop.last -%}\n            {%- set ns.system_prompt = ns.system_prompt + ", " -%}\n        {%- endif -%}\n\t{%- endfor -%}\n\t{%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}\n{%- endif -%}\n{%- if ns.system_prompt -%}\n\t{{- "<|im_start|>system\\n" + ns.system_prompt + "<|im_end|>\\n" -}}\n{%- endif -%}\n{%- for message in messages -%}\n\t{{- "<|im_start|>" + message

In [3]:
# Clean GLM-style template with line-by-line tool format
glm_chat_template = '''{{- bos_token -}}
{%- set system_prompt = "" -%}
{%- set ns = namespace(system_prompt="") -%}
{%- if messages[0]["role"] == "system" -%}
	{%- set ns.system_prompt = messages[0]["content"] -%}
	{%- set messages = messages[1:] -%}
{%- endif -%}
{%- if tools -%}
	{%- set ns.system_prompt = ns.system_prompt + ("\\n" if ns.system_prompt else "") + "# Tools\\nYou may call one or more functions to assist with the user query.\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\\n" -%}
	{%- for tool in tools -%}
		{%- if tool is not string -%}
			{%- set tool = tool.function | tojson -%}
		{%- endif -%}
		{%- set ns.system_prompt = ns.system_prompt + tool -%}
		{%- if not loop.last -%}
			{%- set ns.system_prompt = ns.system_prompt + "\\n" -%}
		{%- endif -%}
	{%- endfor -%}
	{%- set ns.system_prompt = ns.system_prompt + "\\n</tools>\\nFor each function call, output the function name and arguments within the following XML format:\\n<tool_call>{function-name}\\n<arg_key>{arg_key}</arg_key>\\n<arg_value>{arg_value}</arg_value>\\n...\\n</tool_call>" -%}
{%- endif -%}
{%- if ns.system_prompt -%}
	{{- "<|im_start|>system\\n" + ns.system_prompt + "<|im_end|>\\n" -}}
{%- endif -%}
{%- for message in messages -%}
	{{- "<|im_start|>" + message["role"] + "\\n" -}}
	{%- set content = message["content"] -%}
	{%- if content is not string -%}
		{%- set content = content | tojson -%}
	{%- endif -%}
	{%- if message["role"] == "tool" -%}
		{%- set content = "\n<|observation|>\n<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
	{%- endif -%}
	{{- content + "<|im_end|>\\n" -}}
{%- endfor -%}
{%- if add_generation_prompt -%}
	{{- "<|im_start|>assistant\\n" -}}
{%- endif -%}'''

tokenizer.chat_template = glm_chat_template

In [5]:
base_tools = {
  "web_search": {
    "type": "function",
    "function": {
      "name": "web_search",
      "description": "Search the web",
      "parameters": {
        "type": "object",
        "properties": {
          "query": {
            "type": "string",
            "description": "Search query"
          }
        },
        "required": [
          "query"
        ]
      },
      "return": {
        "type": "array",
        "items": {
          "type": "object"
        },
        "description": "The list of items having url,title,description."
      }
    }
  },
  "think": {
    "type": "function",
    "function": {
      "name": "think",
      "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.",
      "parameters": {
        "type": "object",
        "properties": {
          "thought": {
            "type": "string",
            "description": "A thought to think about."
          }
        },
        "required": [
          "thought"
        ]
      }
    }
  }
}

"""
  "response": {
    "type": "function",
    "function": {
      "name": "response",
      "description": "Send a message back to the user.",
      "parameters": {
        "type": "object",
        "properties": {
          "message": {
            "type": "string",
            "description": "The message to send to the user."
          }
        },
        "required": [
          "message"
        ]
      }
    }
  }
"""

'\n  "response": {\n    "type": "function",\n    "function": {\n      "name": "response",\n      "description": "Send a message back to the user.",\n      "parameters": {\n        "type": "object",\n        "properties": {\n          "message": {\n            "type": "string",\n            "description": "The message to send to the user."\n          }\n        },\n        "required": [\n          "message"\n        ]\n      }\n    }\n  }\n'

In [6]:
# Complete conversation with user, assistant, and tool messages (no think tags)
messages = [
    {"role": "user", "content": "hiiii"}
]

formatted = tokenizer.apply_chat_template(
    messages, 
    tools=list(base_tools.values()), 
    tokenize=False, 
    add_generation_prompt=True
)

print(formatted)

<|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
</tools>
For each function call, output the function name and arguments within the following XML format:
<tool

In [7]:
# Complete conversation with user, assistant, and tool messages (no think tags)
messages = [
    {"role": "user", "content": "What is the current status of candidate ID 12345?"},
    {"role": "assistant", "content": '<tool_call>get_candidate_status\n<arg_key>candidate_id</arg_key>\n<arg_value>12345</arg_value>\n</tool_call>'},
    {"role": "tool", "content": '{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}'},
    {"role": "assistant", "content": "The candidate with ID 12345 is currently in the \"Interview Scheduled\" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20."},
    {"role": "user", "content": "Can you also search for all candidates for the position of Data Scientist?"},
    {"role": "assistant", "content": '<tool_call>search_database\n<arg_key>query</arg_key>\n<arg_value>Data Scientist</arg_value>\n<arg_key>limit</arg_key>\n<arg_value>10</arg_value>\n</tool_call>'},
    {"role": "tool", "content": '[{"candidate_id": "67890", "name": "John Doe", "status": "Applied"}, {"candidate_id": "67891", "name": "Jane Smith", "status": "Interview Completed"}]'},
    {"role": "assistant", "content": "I found 2 candidates for the Data Scientist position:\n1. John Doe (ID: 67890) - Status: Applied\n2. Jane Smith (ID: 67891) - Status: Interview Completed"}
]

formatted = tokenizer.apply_chat_template(
    messages,
    tools=list(base_tools.values()),
    tokenize=False, 
    add_generation_prompt=False
)

print(formatted)

<|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
</tools>
For each function call, output the function name and arguments within the following XML format:
<tool

## Pre fine-tuning for formatting

In [8]:
from datasets import load_dataset
dataset = load_dataset("Team-ACE/ToolACE")
dataset

DatasetDict({
    train: Dataset({
        features: ['system', 'conversations'],
        num_rows: 11300
    })
})

In [9]:
import re

def convert_assistant_response(text: str) -> str:
    text = text.strip()
    # Match pattern: [FunctionName(arg1="val1", arg2='val2', ...)]
    match = re.match(r'\[(\w+(?:\s+\w+)*)\((.*)\)\]', text)
    if not match:
        return text  # Not a tool call â†’ return as-is

    func_name = match.group(1)
    args_str = match.group(2)

    # Parse key=value pairs (support both "..." and '...')
    args = []
    for pair in args_str.split(','):
        pair = pair.strip()
        if '=' in pair:
            key, value = pair.split('=', 1)
            key = key.strip()
            value = value.strip().strip('"').strip("'")
            args.append((key, value))

    # Build XML-style output
    xml_lines = [f"<tool_call>{func_name}"]
    for k, v in args:
        xml_lines.append(f"<arg_key>{k}</arg_key>")
        xml_lines.append(f"<arg_value>{v}</arg_value>")
    xml_lines.append("</tool_call>")
    
    return "\n".join(xml_lines)

inp = '[Get Zip Code Information(country="us", postal_code="10001"),Get Zip Code sdf(country="us", postal_code="10001")]'
print(convert_assistant_response(inp))

<tool_call>Get Zip Code Information
<arg_key>country</arg_key>
<arg_value>us</arg_value>
<arg_key>postal_code</arg_key>
<arg_value>10001")</arg_value>
<arg_key>Get Zip Code sdf(country</arg_key>
<arg_value>us</arg_value>
<arg_key>postal_code</arg_key>
<arg_value>10001</arg_value>
</tool_call>


In [10]:
import json

def convert_to_messages(example):
    _messages = []
    tools = example['system'].split("Here is a list of functions in JSON format that you can invoke:")[-1].strip().split("Should you decide to return the function call(s)")[0].replace(". \n","")
    for x in example['conversations']:
        if x['from'] =="assistant":
            _messages.append({"role":x['from'],"content":convert_assistant_response(x['value'])})
        else:
            _messages.append({"role":x['from'],"content":x['value']})

    return {"messages":_messages,"tools":tools}

dataset = dataset.map(convert_to_messages,num_proc=32)
dataset

DatasetDict({
    train: Dataset({
        features: ['system', 'conversations', 'messages', 'tools'],
        num_rows: 11300
    })
})

In [11]:
def filter_tool(example):
    try:
            json.loads(example['tools'])
    except Exception as e:
            return False
    return True

dataset = dataset.filter(filter_tool,num_proc=32)
dataset

DatasetDict({
    train: Dataset({
        features: ['system', 'conversations', 'messages', 'tools'],
        num_rows: 10782
    })
})

In [12]:
import json

def normalize_tool(tool):
    # If already in correct format, return as-is
    if "type" in tool and tool.get("type") == "function" and "function" in tool:
        return tool

    # Otherwise, assume it's the old flat format
    return {
        "type": "function",
        "function": {
            "name": tool["name"],
            "description": tool.get("description", ""),
            "parameters": {
                "type": "object",
                "properties": tool.get("parameters", {}).get("properties", {}),
                "required": tool.get("parameters", {}).get("required", [])
                # Note: ignore top-level "required": null
            }
        }
    }

def apply_chat_template(example):
    dynamic_tools = json.loads(example['tools'])
    normalized_dynamic_tools = [normalize_tool(t) for t in dynamic_tools]
    all_tools = list(base_tools.values()) + normalized_dynamic_tools

    prompt = tokenizer.apply_chat_template(
        example['messages'],
        tools=all_tools,
        tokenize=False,
        add_generation_prompt=False
    )
    return {"prompt": prompt}

dataset = dataset.map(apply_chat_template,num_proc=32)
dataset


DatasetDict({
    train: Dataset({
        features: ['system', 'conversations', 'messages', 'tools', 'prompt'],
        num_rows: 10782
    })
})

In [13]:
print(dataset['train'][1]['prompt'])

<|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
{"name": "Quotes by Keywords", "description": "Returns a list of quotes containing the specified keyword.", "p

In [14]:
dataset['train'][0]['tools']

'[{"name": "newAddress", "description": "Generates a new Ethereum address that can be used to send or receive funds. Do not lose the password! We can\'t restore access to an address if you lose it.", "parameters": {"type": "dict", "properties": {"password": {"description": "The password for the new Ethereum address", "type": "string"}}, "required": ["password"]}, "required": null}, {"name": "Market Trends API", "description": "Get the latest market trends and relevant news for a specified country and language.", "parameters": {"type": "dict", "properties": {"trend_type": {"description": "Trend type.", "type": "string", "enum": ["MARKET_INDEXES", "MOST_ACTIVE", "GAINERS", "LOSERS", "CRYPTO", "CURRENCIES", "CLIMATE_LEADERS"]}, "country": {"description": "The country for which to get trends, specified as a 2-letter country code - see ISO 3166.", "type": "string", "default": "us"}, "language": {"description": "The language to use for the results, specified as a 2-letter language code - see

## Let's now pre fine-tune the model so it follows our custom GRPO formatting!

In [None]:
from trl import SFTTrainer, SFTConfig

train_dataset_sft = dataset['train'].select(range(250))
eval_dataset_sft = dataset['train'].select(range(250,750))

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset_sft,
    eval_dataset=eval_dataset_sft
    args = SFTConfig(
        dataset_text_field = "prompt",
        per_device_train_batch_size = 16,
        gradient_accumulation_steps = 4, # Use GA to mimic batch size!
        warmup_steps = 5,
        num_train_epochs = 2, # Set this for 1 full training run.
        learning_rate = 2e-5, # Reduce to 2e-5 for long training runs
        logging_steps = 5,
        # optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,

        report_to = "tensorboard", # Use this for WandB etc
        max_steps=200

    ),
)
trainer.train()


Unsloth: Tokenizing ["prompt"] (num_proc=32):   0%|          | 0/1200 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["prompt"] (num_proc=32): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1200/1200 [00:01<00:00, 751.69 examples/s] 
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,200 | Num Epochs = 2 | Total steps = 150
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 1 x 1) = 16
 "-____-"     Trainable parameters = 983,040 of 355,467,008 (0.28% trained)


Step,Training Loss
5,0.8052
10,0.8147
15,0.7814
20,0.7605
25,0.7519
30,0.7439
35,0.7259
40,0.7513
45,0.7038
50,0.7095


TrainOutput(global_step=150, training_loss=0.7256097173690796, metrics={'train_runtime': 207.6896, 'train_samples_per_second': 11.556, 'train_steps_per_second': 0.722, 'total_flos': 1.0807700146962432e+16, 'train_loss': 0.7256097173690796, 'epoch': 2.0})

In [38]:
text = tokenizer.apply_chat_template(
    [{"role":"user","content":"search for when is modi born? and who won ipl match"}],
    tools = list(base_tools.values()),
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    temperature = 0.3,
    max_new_tokens = 2048,
    streamer = TextStreamer(tokenizer, skip_prompt = False),
)

<|startoftext|><|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
</tools>
For each function call, output the function name and arguments within the following XM

In [40]:
#type(model) peft.peft_model.PeftModelForCausalLM
model.save_pretrained("lfm2-sft-tool")

In [7]:
from peft import PeftModel
from transformers import AutoModelForCausalLM

base_model = AutoModelForCausalLM.from_pretrained("LiquidAI/LFM2-350M")
sft_peft_model = PeftModel.from_pretrained(base_model, "lfm2-sft-tool").to('cuda')
text = tokenizer.apply_chat_template(
    [{"role":"user","content":"search for when is modi born? and who won ipl match"}],
    tools = list(base_tools.values()),
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
)

from transformers import TextStreamer
_ = sft_peft_model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    temperature = 0.3,
    max_new_tokens = 2048,
    streamer = TextStreamer(tokenizer, skip_prompt = False),
)

<|startoftext|><|startoftext|><|im_start|>system
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"name": "web_search", "description": "Search the web", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}}, "required": ["query"]}, "return": {"type": "array", "items": {"type": "object"}, "description": "The list of items having url,title,description."}}
{"name": "think", "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.", "parameters": {"type": "object", "properties": {"thought": {"type": "string", "description": "A thought to think about."}}, "required": ["thought"]}}
</tools>
For each function call, output the function name and arguments within the following XM

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

<tool_call>web_search
<arg_key>searchQuery</arg_key>
<arg_value>When is Modi born?</arg_value>
<arg_key>arg_key</arg_key>
<arg_value>When is Modi born?</arg_value>
<arg_key>arg_key</arg_key>
<arg_value>Who won IPL match</arg_value>
</tool_call><|im_end|>
