In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained(
    "/home/rickzhou/function_calling/models/starcoder2-7b"
)
model = AutoModelForCausalLM.from_pretrained(
    "/home/rickzhou/function_calling/models/starcoder2-7b",
    torch_dtype=torch.bfloat16,
    use_flash_attention_2=True,
    device_map="auto",
)

prompt = [
    {
        "from": "system",
        "value": "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'find_max_value', 'description': 'Find the maximum value in an array', 'parameters': {'type': 'object', 'properties': {'array': {'type': 'array', 'items': {'type': 'number'}, 'description': 'The array to search in'}}, 'required': ['array']}}}, {'type': 'function', 'function': {'name': 'search_restaurants', 'description': 'Search for restaurants based on location and cuisine', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location of the restaurant'}, 'cuisine': {'type': 'string', 'description': 'The cuisine type of the restaurant'}, 'price_range': {'type': 'string', 'description': 'The price range of the restaurant'}}, 'required': ['location']}}}] </tools>Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:\n<tool_call>\n{tool_call}\n</tool_call>",
    },
    {
        "from": "human",
        "value": "Hi, I have an array of numbers and I need to find the maximum value. The array is [5, 7, 2, 9, 1, 6].",
    },
]


def format_conversation(example):
    """Format conversation using ChatML template."""
    try:
        conversation = ""
        for message in example:
            if (
                not isinstance(message, dict)
                or "from" not in message
                or "value" not in message
            ):
                continue

            role = message["from"]
            if role == "system":
                conversation += f"<|im_start|>system\n{message['value']}<|im_end|>\n"
            elif role == "human":
                conversation += f"<|im_start|>user\n{message['value']}<|im_end|>\n"
            elif role == "gpt":
                conversation += f"<|im_start|>assistant\n{message['value']}<|im_end|>\n"
        return conversation
    except Exception as e:
        return ""


model.eval()


with torch.no_grad():
    prompt_formatted = format_conversation(prompt)
    print(prompt_formatted)

    inputs = tokenizer(prompt_formatted, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_length=1024,
        temperature=0.1,
        do_sample=True,
        stop_strings=["<|im_end|>"],
        tokenizer=tokenizer,
    )
    outputs = outputs[:, inputs["input_ids"].shape[-1] :]
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(response)

Loading checkpoint shards: 100%|██████████| 3/3 [00:07<00:00,  2.49s/it]
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'find_max_value', 'description': 'Find the maximum value in an array', 'parameters': {'type': 'object', 'properties': {'array': {'type': 'array', 'items': {'type': 'number'}, 'description': 'The array to search in'}}, 'required': ['array']}}}, {'type': 'function', 'function': {'name': 'search_restaurants', 'description': 'Search for restaurants based on location and cuisine', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location of the restaurant'}, 'cuisine': {'type': 'string', 'description': 'The cuisine type of the restaurant'}, 'price_range': {'type': 'string', 'description': 'The price range of

In [15]:
from peft import PeftModel

peft_model = PeftModel.from_pretrained(
    model, "/home/rickzhou/function_calling/sft/starcoder-function-calling"
)

peft_model.eval()

with torch.no_grad():
    prompt_formatted = format_conversation(prompt)
    print(prompt_formatted)

    inputs = tokenizer(prompt_formatted, return_tensors="pt")
    outputs = peft_model.generate(
        **inputs,
        max_length=1024,
        temperature=0.1,
        do_sample=True,
        stop_strings=["<|im_end|>"],
        tokenizer=tokenizer
    )
    outputs = outputs[:, inputs["input_ids"].shape[-1] :]
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(response)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


<|im_start|>system
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.Here are the available tools:<tools> [{'type': 'function', 'function': {'name': 'find_max_value', 'description': 'Find the maximum value in an array', 'parameters': {'type': 'object', 'properties': {'array': {'type': 'array', 'items': {'type': 'number'}, 'description': 'The array to search in'}}, 'required': ['array']}}}, {'type': 'function', 'function': {'name': 'search_restaurants', 'description': 'Search for restaurants based on location and cuisine', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location of the restaurant'}, 'cuisine': {'type': 'string', 'description': 'The cuisine type of the restaurant'}, 'price_range': {'type': 'string', 'description': 'The price range of