# Custom Chat Templates with SambaNova API

This notebook walks through the complete workflow of how modern chat models format, send, and interpret conversations. From Jinja chat templates to Completions API invocation and tool-call parsing.

## Setup

In [None]:
import os
import sys
import json
import re
import uuid
import requests
from pydantic import BaseModel
from datetime import datetime
from pprint import pprint
from dotenv import load_dotenv
from sambanova import SambaNova
from transformers import AutoTokenizer
from jinja2 import Template, Environment, TemplateSyntaxError

current_dir = os.getcwd()
kit_dir =  os.path.abspath(os.path.join(current_dir, ".."))
repo_dir = os.path.abspath(os.path.join(kit_dir, ".."))

sys.path.append(repo_dir)

from dotenv import load_dotenv

load_dotenv(os.path.join(repo_dir, '.env'), override=True)

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


True

## Get Chat Template

Load the tokenizer’s built-in chat template from a Hugging Face model or define a custom Jinja template manually.

### From HugginFace model

In [2]:
def load_tokenizer(model_name: str, hf_token: str | None = None, cache_dir: str = "../data"):
    os.makedirs(cache_dir, exist_ok=True)
    tok = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token, cache_dir=cache_dir)
    print(f"Tokenizer loaded: {model_name}")
    return tok

def get_chat_template(tokenizer):
    tpl = getattr(tokenizer, "chat_template", None)
    if not tpl:
        raise ValueError("Model has no chat_template")
    print("chat template retrieved")
    return tpl

In [3]:
hf_model = "meta-llama/Llama-3.1-8B-Instruct"  # example model
hf_tokenizer = load_tokenizer(hf_model, os.getenv("HUGGINGFACE_TOKEN"))
hf_chat_template = get_chat_template(hf_tokenizer)

print("\n--- Chat template (truncated) ---")
print(hf_chat_template)



Tokenizer loaded: meta-llama/Llama-3.1-8B-Instruct
chat template retrieved

--- Chat template (truncated) ---
{{- bos_token }}
{%- if custom_tools is defined %}
    {%- set tools = custom_tools %}
{%- endif %}
{%- if not tools_in_user_message is defined %}
    {%- set tools_in_user_message = true %}
{%- endif %}
{%- if not date_string is defined %}
    {%- set date_string = "26 Jul 2024" %}
{%- endif %}
{%- if not tools is defined %}
    {%- set tools = none %}
{%- endif %}

{#- This block extracts the system message, so we can slot it into the right place. #}
{%- if messages[0]['role'] == 'system' %}
    {%- set system_message = messages[0]['content']|trim %}
    {%- set messages = messages[1:] %}
{%- else %}
    {%- set system_message = "" %}
{%- endif %}

{#- System message + builtin tools #}
{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
{%- if builtin_tools is defined or tools is not none %}
    {{- "Environment: ipython\n" }}
{%- endif %}
{%- if builtin_tools is defined 

### Create Custom Jinja template

In [4]:
def validate_jinja(template_str: str):
    try:
        Environment().parse(template_str)
        print("Jinja template valid")
        return True
    except TemplateSyntaxError as e:
        raise ValueError(f"Invalid Jinja syntax at line {e.lineno}: {e.message}")

In [5]:
# custom deepseek v3 chat template with tool input and expected XML output format
custom_chat_template = """
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}
{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}

{# Collect system messages #}
{%- for message in messages %}
    {%- if message['role'] == 'system' %}
        {%- if ns.is_first_sp %}
            {% set ns.system_prompt = ns.system_prompt + message['content'] %}
            {% set ns.is_first_sp = false %}
        {%- else %}
            {% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}
        {%- endif %}
    {%- endif %}
{%- endfor %}

{%- if not date_string is defined %}
    {%- set date_string = "26 Jul 2024" %}
{%- endif %}

{{ bos_token }}{{ ns.system_prompt }}{{- "\nToday Date: " + date_string + "\n\n" }}

{# Iterate through user/assistant/tool messages #}
{%- for message in messages %}
    {%- if message['role'] == 'user' %}
        {% set ns.is_tool = false -%}
        {{ '<｜User｜>' + message['content'] }}
    {%- endif %}

    {%- if message['role'] == 'assistant' and message['content'] is none %}
        {% set ns.is_tool = false -%}
        {%- for tool in message['tool_calls'] %}
            {%- if not ns.is_first %}
                {{ '<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n```json\\n' + tool['function']['arguments'] + '\\n```<｜tool▁call▁end｜>' }}
                {% set ns.is_first = true -%}
            {%- else %}
                {{ '\\n<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n```json\\n' + tool['function']['arguments'] + '\\n```<｜tool▁call▁end｜>' }}
                {{ '<｜tool▁calls▁end｜><｜end▁of▁sentence｜>' }}
            {%- endif %}
        {%- endfor %}
    {%- endif %}

    {%- if message['role'] == 'assistant' and message['content'] is not none %}
        {%- if ns.is_tool %}
            {{ '<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>' }}
            {% set ns.is_tool = false -%}
        {%- else %}
            {{ '<｜Assistant｜>' + message['content'] + '<｜end▁of▁sentence｜>' }}
        {%- endif %}
    {%- endif %}

    {%- if message['role'] == 'tool' %}
        {% set ns.is_tool = true -%}
        {%- if ns.is_output_first %}
            {{ '<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>' }}
            {% set ns.is_output_first = false %}
        {%- else %}
            {{ '\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>' }}
        {%- endif %}
    {%- endif %}
{%- endfor %}

{% if ns.is_tool %}{{ '<｜tool▁outputs▁end｜>' }}{% endif %}
{% if add_generation_prompt and not ns.is_tool %}{{ '<｜Assistant｜>' }}{% endif %}

{# ---- Append tool metadata if tools exist ---- #}
{% if tools and tools | length > 0 %}
<｜system▁tools｜>
Below are available tools for this task. Use them when relevant, wrapping each call in <｜tool▁call▁begin｜> ... <｜tool▁call▁end｜> XML markers, ensure using exact tool and parameter names.

{%- for t in tools %}
    {{- t | tojson(indent=4) }}
    {{- "\n\n" }}
{%- endfor %}

Example tool usage output:
<｜tool▁calls▁begin｜>
<｜tool▁call▁begin｜>tool_1_name<｜tool▁sep｜>{"tool_1_name_param_1_name": "tool_1_name_param_0_value"}<｜tool▁call▁end｜>
<｜tool▁call▁begin｜>tool_name_2<｜tool▁sep｜>{"tool_2_name_param_1_name": "tool_2_name_param_1_value", "tool_2_name_param_2_name"": "tool_2_name_param_2_value"}<｜tool▁call▁end｜>
<｜tool▁calls▁end｜>
{% endif %}
"""
validate_jinja(custom_chat_template)
print("\n--- Custom chat template ---")
print(custom_chat_template)

Jinja template valid

--- Custom chat template ---

{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}
{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}

{# Collect system messages #}
{%- for message in messages %}
    {%- if message['role'] == 'system' %}
        {%- if ns.is_first_sp %}
            {% set ns.system_prompt = ns.system_prompt + message['content'] %}
            {% set ns.is_first_sp = false %}
        {%- else %}
            {% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
        {%- endif %}
    {%- endif %}
{%- endfor %}

{%- if not date_string is defined %}
    {%- set date_string = "26 Jul 2024" %}
{%- endif %}

{{ bos_token }}{{ ns.system_prompt }}{{- "
Today Date: " + date_string + "

" }}

{# Iterate through user/assistant/tool messages #}
{%- for message in messages %}
    {%- if message['role'] == 'user' %}
        {% set ns.

## Apply chat template

When a Jinja chat template is rendered, variables like `messages`, `tools`, or `add_generation_prompt` are passed in a dictionary called the *context*.  
It can also include tokens or special variables from the tokenizer (like `bos_token`, `eos_token`, etc.).  
The template then uses this context to produce the final text string sent to the model.

In [6]:

def extract_tokenizer_context(tokenizer):
    """
    Collect all simple (JSON-serializable) attributes from the tokenizer
    that may be referenced by the Jinja chat template.
    """
    context = {}
    for key, value in vars(tokenizer).items():
        # Skip private/internal and complex objects
        if key.startswith("_"):
            continue
        if isinstance(value, (str, int, float, list, dict, tuple, type(None))):
            context[key] = value
    return context


def apply_chat_template(
    template_str: str, 
    messages: list, 
    tools: list | None = None,
    add_generation_prompt: bool = True, 
    tokenizer=None,
    extra_context: dict | None = None
    ) -> str:
    """Render a Jinja2 chat template with messages and optional tools."""
    context = {
        "messages": messages,
        "add_generation_prompt": add_generation_prompt,
    }
    if tools is not None:
        context["tools"] = tools
    if tokenizer is not None:
        context.update(extract_tokenizer_context(tokenizer))
    if extra_context:
        context.update(extra_context)

    try:
        template = Template(template_str)
        rendered = template.render(**context)
        return rendered.strip()
    except Exception as e:
        error_msg = f"Error rendering custom Jinja template: {e}"
        raise ValueError(error_msg)

In [7]:
# Example messages and tools
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What’s the weather in Paris?"}
]

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Retrieve weather data for a given city",
            "parameters": {"city": "Paris"}
        }
    }
]

In [8]:
#apply hf_chat_template 
rendered_prompt_hf_template = apply_chat_template(
    hf_chat_template,
    messages,
    tools=tools,
    add_generation_prompt=True,
    tokenizer=hf_tokenizer
)

print("\n--- Rendered prompt ---\n")
print(rendered_prompt_hf_template)


--- Rendered prompt ---

<|start_header_id|>system<|end_header_id|>

Environment: ipython
Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>

Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.

Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables.

{
    "function": {
        "description": "Retrieve weather data for a given city",
        "name": "get_weather",
        "parameters": {
            "city": "Paris"
        }
    },
    "type": "function"
}

What’s the weather in Paris?<|eot_id|><|start_header_id|>assistant<|end_header_id|>


In [9]:
#apply custom_chat_template (custom DeepSeekV3))
rendered_prompt_custom_template = apply_chat_template(
    custom_chat_template,
    messages,
    tools=tools,
    add_generation_prompt=True,
    extra_context={
        "bos_token": "<｜begin▁of▁sentence｜>",
        "date_string": f'{datetime.now().strftime('%d %b %Y')}'
    }
)

print("\n--- Rendered prompt ---\n")
print(rendered_prompt_custom_template)


--- Rendered prompt ---

<｜begin▁of▁sentence｜>You are a helpful assistant.
Today Date: 10 Nov 2025




        <｜User｜>What’s the weather in Paris?


<｜Assistant｜>



<｜system▁tools｜>
Below are available tools for this task. Use them when relevant, wrapping each call in <｜tool▁call▁begin｜> ... <｜tool▁call▁end｜> XML markers, ensure using exact tool and parameter names.{
    "function": {
        "description": "Retrieve weather data for a given city",
        "name": "get_weather",
        "parameters": {
            "city": "Paris"
        }
    },
    "type": "function"
}



Example tool usage output:
<｜tool▁calls▁begin｜>
<｜tool▁call▁begin｜>tool_1_name<｜tool▁sep｜>{"tool_1_name_param_1_name": "tool_1_name_param_0_value"}<｜tool▁call▁end｜>
<｜tool▁call▁begin｜>tool_name_2<｜tool▁sep｜>{"tool_2_name_param_1_name": "tool_2_name_param_1_value", "tool_2_name_param_2_name"": "tool_2_name_param_2_value"}<｜tool▁call▁end｜>
<｜tool▁calls▁end｜>


## Invoke completions API

In [10]:
# Send the rendered prompt to SambaNova Completions API using the SambaNova SDK.
base_url = "https://api.sambanova.ai/v1" 
api_key = os.getenv("SAMBANOVA_API_KEY")
client = SambaNova(api_key=api_key, base_url=base_url)

In [11]:
# call llama model with hf_template generated prompt
response = client.completions.create(
    model="Meta-Llama-3.1-8B-Instruct",
    prompt=rendered_prompt_hf_template,
    max_tokens=2048,
    temperature=0.0,
    stream=False
)
raw_output_hf_template = response.choices[0].text
print(raw_output_hf_template)



{"name": "get_weather", "parameters": {"city": "Paris"}}


In [12]:
# call deepseek model with custom_template generated prompt
response = client.completions.create(
    model="DeepSeek-V3.1",
    prompt=rendered_prompt_custom_template,
    max_tokens=2048,
    temperature=0.0,
    stream=False
)
raw_output_custom_template = response.choices[0].text
print(raw_output_custom_template)

<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{"city": "Paris"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>



## Parse Model Output


In [13]:

# Generate random ID for  invoked tool  
def generate_random_id(length=18):
    return "call_" + str(uuid.uuid4()).replace('-', '')[:length]

# Basic Pydantic model for verifying function calling format 
class ToolCallModel(BaseModel):
    """Schema validator for a tool-call output."""
    name: str
    arguments: dict
    
# Use a the Pydantic model to verify the format
def instantiate_function_calling_model(model_name: str, parameters: dict):
    """
    Validate and format a function-call into OpenAI tool-call schema.
    """
    ToolCallModel(name=model_name, arguments=parameters)
    return {
        "id": generate_random_id(),
        "type": "function",
        "function": {"name": model_name, "arguments": json.dumps(parameters)},
    }

# parse to messages chat structure
def parse_to_message(response: str, tool_calls: str):
    if tool_calls:
        return {"role": "assistant", "content": None, "tool_calls": tool_calls}
    return {"role": "assistant", "content": response.strip(), "tool_calls": []}


### Parse JSON tool calls (default llama-3.1-8B hf chat template )

In [14]:
# Llama3.1 8B's chat template instructs the model to use JSON tool call format.

def extract_llama_json_strings(response: str):
    """Extract every top-level {...} JSON object from text."""
    fc_strings, brace_count, start = [], 0, None
    for i, ch in enumerate(response):
        if ch == "{":
            if brace_count == 0:
                start = i
            brace_count += 1
        elif ch == "}" and start is not None:
            brace_count -= 1
            if brace_count == 0:
                fc_strings.append(response[start : i + 1])
                start = None
    return fc_strings

def llama3_parser(response: str):
    """Parse JSON-style tool calls (Llama3)."""
    calls = []
    for js in extract_llama_json_strings(response):
        try:
            obj = json.loads(js)
            name, params = obj["name"], obj["parameters"]
            calls.append(instantiate_function_calling_model(name, params))
        except Exception as e:
            error_msg = f"Invalid tool call in block {js[:120]}...: {e}"
            raise ValueError(error_msg)
    return calls

In [15]:
llama_response = raw_output_hf_template
llama_tools = llama3_parser(llama_response)

pprint(parse_to_message(llama_response, llama_tools))


{'content': None,
 'role': 'assistant',
 'tool_calls': [{'function': {'arguments': '{"city": "Paris"}',
                              'name': 'get_weather'},
                 'id': 'call_25aae1eb0404424699',
                 'type': 'function'}]}


### Parse XML tool calls (custom deepseek chat template )

In [16]:
# DeepSeek V3.1 uses a XML style format for tool calls with special tokens as markers
def extract_deepseek_v3_xml_strings(response: str):
    """Extract tool-call pairs from DeepSeek XML markers."""
    fc_strings = []
    pattern = r"<｜tool▁call▁begin｜>(.*?)<｜tool▁sep｜>(.*?)<｜tool▁call▁end｜>"
    for match in re.findall(pattern, response, re.DOTALL):
        name, args = match
        try:
            args = json.loads(args)
            fc_strings.append({"name": name.strip(), "parameters": args})
        except Exception as e:
            error_msg = (
                f"Invalid JSON in DeepSeek tool-call arguments for '{name.strip()}': {e.msg}"
            )
            raise ValueError(error_msg)
    return fc_strings

def deepseek_v3_parser(response: str):
    """Parse DeepSeek XML-style tool calls into OpenAI format."""
    calls = []
    for fc in extract_deepseek_v3_xml_strings(response):
        calls.append(instantiate_function_calling_model(fc["name"], fc["parameters"]))
    return calls

In [17]:
deepseek_response = raw_output_custom_template
deepseek_tools = deepseek_v3_parser(deepseek_response)

pprint(parse_to_message(deepseek_response, deepseek_tools))


{'content': None,
 'role': 'assistant',
 'tool_calls': [{'function': {'arguments': '{"city": "Paris"}',
                              'name': 'get_weather'},
                 'id': 'call_d99d4bae428b478dad',
                 'type': 'function'}]}


## End to End Workflow

In [19]:
# Example full roundtrip (Llama JSON-based chat template)

print("=== Llama 3.1 Full workflow ===")

# Step 1: Apply chat template
rendered_llama_prompt = apply_chat_template(
    hf_chat_template,
    messages,
    tools=tools,
    add_generation_prompt=True,
    tokenizer=hf_tokenizer
)
print("\n--- Rendered Prompt ---\n")
print(rendered_llama_prompt[:600] + "..." if len(rendered_llama_prompt) > 600 else rendered_llama_prompt)

# Step 2: Invoke model
response = client.completions.create(
    model="Meta-Llama-3.1-8B-Instruct",
    prompt=rendered_llama_prompt,
    max_tokens=2048,
    temperature=0.0,
)
raw_text = response.choices[0].text
print("\n--- Raw Model Output ---\n")
print(raw_text.strip())

# Step 3: Parse output
parsed_calls = llama3_parser(raw_text)
final_message = parse_to_message(raw_text, parsed_calls)

print("\n--- Parsed Assistant Message ---\n")
pprint(final_message)


=== Llama 3.1 Full workflow ===

--- Rendered Prompt ---

<|start_header_id|>system<|end_header_id|>

Environment: ipython
Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>

Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.

Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables.

{
    "function": {
        "description": "Retrieve weather data for a given city",
        "name": "get_weather",
        "...

--- Raw Model Output ---

{"name": "get_weather", "parameters": {"city": "Paris"}}

--- Parsed Assistant Message ---

{'content': None,
 'role': 'assistant',
 'tool_calls': [{'function': {'arguments': '{"city": "Paris"}',
                              'name': 'get_weather'},
                 'id': 'call_adadcf6579804b279a',
           

In [20]:
# Example full roundtrip (custom Deepseek XML-based chat template)

print("=== DeepSeek V3 Full workflow ===")

# Step 1: Apply custom chat template
rendered_deepseek_prompt = apply_chat_template(
    custom_chat_template,
    messages,
    tools=tools,
    add_generation_prompt=True,
    extra_context={
        "bos_token": "<｜begin▁of▁sentence｜>",
        "date_string": f'{datetime.now().strftime("%d %b %Y")}'
    }
)
print("\n--- Rendered Prompt ---\n")
print(rendered_deepseek_prompt[:600] + "..." if len(rendered_deepseek_prompt) > 600 else rendered_deepseek_prompt)

# Step 2: Invoke model
response = client.completions.create(
    model="DeepSeek-V3.1",
    prompt=rendered_deepseek_prompt,
    max_tokens=2048,
    temperature=0.0,
)
raw_text = response.choices[0].text
print("\n--- Raw Model Output ---\n")
print(raw_text.strip())

# Step 3: Parse output
parsed_calls = deepseek_v3_parser(raw_text)
final_message = parse_to_message(raw_text, parsed_calls)

print("\n--- Parsed Assistant Message ---\n")
pprint(final_message)


=== DeepSeek V3 Full workflow ===

--- Rendered Prompt ---

<｜begin▁of▁sentence｜>You are a helpful assistant.
Today Date: 10 Nov 2025




        <｜User｜>What’s the weather in Paris?


<｜Assistant｜>



<｜system▁tools｜>
Below are available tools for this task. Use them when relevant, wrapping each call in <｜tool▁call▁begin｜> ... <｜tool▁call▁end｜> XML markers, ensure using exact tool and parameter names.{
    "function": {
        "description": "Retrieve weather data for a given city",
        "name": "get_weather",
        "parameters": {
            "city": "Paris"
        }
    },
    "type": "function"
}



Example tool usage output:
<｜tool▁calls▁be...

--- Raw Model Output ---

<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{"city": "Paris"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>

--- Parsed Assistant Message ---

{'content': None,
 'role': 'assistant',
 'tool_calls': [{'function': {'arguments': '{"city": "Paris"}',
                              'name': 'get_weather'},
 