In [1]:
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
import torch

model_id = "google/gemma-3-12b-it"

model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id, device_map="auto"
).eval()

processor = AutoProcessor.from_pretrained(model_id)


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 5/5 [00:17<00:00,  3.53s/it]
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [2]:
# Define the function schema

tools_get_weather = {
    "type": "function",
    "function": {
        "name": "get_weather",
        "description": "Get the current weather in a given location.",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "Enter the 'city name' to get the weather. e.g. 'London'",
                },
                "unit": {
                    "type": "string",
                    "description": "Enter the unit of temperature. e.g. 'metric', 'imperial', 'standard'",
                    "default": "metric",
                },
            },
            "required": ["location"]
        },
    },
}

tools_get_location = {
    "type": "function",
    "function": {
        "name": "get_location",
        "description": "Returns the current location based on the user's device information.",
        "parameters": {
            "type": "object",
            "properties": {}
        },
    },
}

tools = [tools_get_weather, tools_get_location]
print(tools)


[{'type': 'function', 'function': {'name': 'get_weather', 'description': 'Get the current weather in a given location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': "Enter the 'city name' to get the weather. e.g. 'London'"}, 'unit': {'type': 'string', 'description': "Enter the unit of temperature. e.g. 'metric', 'imperial', 'standard'", 'default': 'metric'}}, 'required': ['location']}}}, {'type': 'function', 'function': {'name': 'get_location', 'description': "Returns the current location based on the user's device information.", 'parameters': {'type': 'object', 'properties': {}}}}]


In [3]:
import jinja2
from pydantic import BaseModel, RootModel
from typing import List

class FunctionCall(BaseModel):
    name: str
    arguments: dict

class FunctionCalls(RootModel[List[FunctionCall]]):
    pass

tools_template = jinja2.Template(
"""
{%- for tool in tools %}
    {%- set arguments = {} %}
    {%- if tool.function.parameters.properties %}
        {%- for key, value in tool.function.parameters.properties.items() %}
            {%- set required = tool.function.parameters.required | default([]) %}
            {%- set is_required = key in required %}
            {%- set _ = arguments.update({key: {"type": value.type, "description": value.description, "required": is_required}}) %}
        {%- endfor %}
    {%- endif %}

    {{- '{"name": "%s", "arguments": %s, "description": "%s"}' % (tool.function.name, arguments, tool.function.description) }}

    {%- if not loop.last %}
        {{- ', ' }}
    {%- endif %}
{%- endfor %}
"""
)

## system_template = jinja2.Template(
# """
# Agentic model with function call capability.
# You can call one or more functions to support user queries.
# Do not make any assumptions about what values will be passed to the functions.
# Try to solve the problem through function calls rather than questions.
# The available tools are: {{ tools }}
# If you decide to perform a function call, respond in the format below:
# ```toolcall
# [
#     {'name': <function-name>, 'arguments': <args-dict>}
# ]
# ```
# """.strip()
# )

system_template = jinja2.Template(
"""
Agentic model with function call capability.
Do not explicitly state that you call tools or functions to a user.
If the response can be generated from your internal knowledge which is self-evident or does not change over time, do so.
The available tools are: {{ tools }}
If you decide to perform a function call, respond in the format below:
```toolcall
[
    {'name': <function-name>, 'arguments': <args-dict>}
]
```
""".strip()
)

tools_string = tools_template.render(tools=tools)
print(tools_string)
print()

function_schema = FunctionCalls.model_json_schema()
print(function_schema)
print()

system_prompt = system_template.render(tools=tools_string, pydantic_model=function_schema)
print(system_prompt)

{"name": "get_weather", "arguments": {'location': {'type': 'string', 'description': "Enter the 'city name' to get the weather. e.g. 'London'", 'required': True}, 'unit': {'type': 'string', 'description': "Enter the unit of temperature. e.g. 'metric', 'imperial', 'standard'", 'required': False}}, "description": "Get the current weather in a given location."}, {"name": "get_location", "arguments": {}, "description": "Returns the current location based on the user's device information."}

{'$defs': {'FunctionCall': {'properties': {'name': {'title': 'Name', 'type': 'string'}, 'arguments': {'title': 'Arguments', 'type': 'object'}}, 'required': ['name', 'arguments'], 'title': 'FunctionCall', 'type': 'object'}}, 'items': {'$ref': '#/$defs/FunctionCall'}, 'title': 'FunctionCalls', 'type': 'array'}

Agentic model with function call capability.
Do not explicitly state that you call tools or functions to a user.
If the response can be generated from your internal knowledge which is self-evident o

In [5]:
messages = [
    {
        "role": "system",
        "content": [{"type": "text", "text": system_prompt}]
    },
    {
        "role": "user",
        # "content": [{"type": "text", "text": "Where am I?"}] # single (get_location)
        # "content": [{"type": "text", "text": "What's the weather like in Seoul right now?"}] # single (get_weather)
        # "content": [{"type": "text", "text": "What's the weather like in Seoul and London right now?"}] # parallel (get_weather)
        # "content": [{"type": "text", "text": "What's the weather like in my current location?"}] # nested (get_location -> get_weather)
        # "content": [{"type": "text", "text": "I want to know my current location and the current weather in Seattle, New York and London."}] # mixed parallel (x4 calls)
        "content": [{"type": "text", "text": "y = 3\n60 / (x + y) = 12\n이 식에서 x는 뭐야?"}] # irrelevance (expecting text response)
        # "content": [{"type": "text", "text": "안녕?"}] # multi-lingual multi-turn
    # },
    # {
    #     "role": "assistant",
    #     "content": [{"type": "text", "text": "안녕하세요, 무엇을 도와드릴까요?"}]
    # },
    # {
    #     "role": "user",
    #     "content": [{"type": "text", "text": "날씨 어때?"}]
    }
]

print(processor.apply_chat_template(
    messages, add_generation_prompt=True, tokenize=False,
    return_dict=True, return_tensors="pt"
))

inputs = processor.apply_chat_template(
    messages, add_generation_prompt=True, tokenize=True,
    return_dict=True, return_tensors="pt"
).to(model.device, dtype=torch.bfloat16)

input_len = inputs["input_ids"].shape[-1]

with torch.inference_mode():
    generation = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=1.0)
    generation = generation[0][input_len:]

decoded = processor.decode(generation, skip_special_tokens=True)
print(decoded)

<bos><start_of_turn>user
Agentic model with function call capability.
Do not explicitly state that you call tools or functions to a user.
If the response can be generated from your internal knowledge which is self-evident or does not change over time, do so.
The available tools are: {"name": "get_weather", "arguments": {'location': {'type': 'string', 'description': "Enter the 'city name' to get the weather. e.g. 'London'", 'required': True}, 'unit': {'type': 'string', 'description': "Enter the unit of temperature. e.g. 'metric', 'imperial', 'standard'", 'required': False}}, "description": "Get the current weather in a given location."}, {"name": "get_location", "arguments": {}, "description": "Returns the current location based on the user's device information."}
If you decide to perform a function call, respond in the format below:
```toolcall
[
    {'name': <function-name>, 'arguments': <args-dict>}
]
```

y = 3
60 / (x + y) = 12
이 식에서 x는 뭐야?<end_of_turn>
<start_of_turn>model

x = 2
