In [1]:
pip show llama-stack-client

Name: llama_stack_client
Version: 0.1.8
Summary: The official Python library for the llama-stack-client API
Home-page: https://github.com/meta-llama/llama-stack-client-python
Author: 
Author-email: Llama Stack Client <dev-feedback@llama-stack-client.com>
License-Expression: Apache-2.0
Location: /opt/anaconda3/envs/stack-client/lib/python3.10/site-packages
Requires: anyio, click, distro, httpx, pandas, prompt-toolkit, pyaml, pydantic, rich, sniffio, termcolor, tqdm, typing-extensions
Required-by: llama_stack
Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
import random
import types
from llama_stack_client import LlamaStackClient
from llama_stack_client.lib.agents.client_tool import client_tool
from llama_stack_client.lib.agents.agent import Agent
from llama_stack_client.lib.agents.event_logger import EventLogger
from dotenv import load_dotenv
from rich.pretty import pprint
load_dotenv()

True

In [3]:
# Define real tools
@client_tool
def weather_info(loc: str):
    """Fetches the current weather for a given location.
    
    :param loc: The location for which weather information is requested.
    :returns: A dictionary containing success status and the weather result.
    """
    return {"success": True, "result": f"Weather in {loc} is sunny."}

@client_tool
def word_count(text: str):
    """Counts the number of words in the given text.
    
    :param text: The input text to analyze.
    :returns: A dictionary containing success status and the word count.
    """
    return {"success": True, "result": len(text.split())}

@client_tool
def reverse_string(text: str):
    """Reverses the given string.
    
    :param text: The input text to reverse.
    :returns: A dictionary containing success status and the reversed string.
    """
    return {"success": True, "result": text[::-1]}

@client_tool
def uppercase(text: str):
    """Converts the given string to uppercase.
    
    :param text: The input text to convert.
    :returns: A dictionary containing success status and the uppercase text.
    """
    return {"success": True, "result": text.upper()}

@client_tool
def insurance_scorer(text: str):
    """Generates a insurance score between 1 and 100.
    :param text: The input text to eval.
    :returns: A dictionary containing success status and the generated number.
    """
    return {"success": True, "result": random.randint(1, 100)}

In [None]:
# Generate fake tools using `types.FunctionType`
def generate_fake_tools(n):
    tools = []
    
    for i in range(n):
        tool_name = f"tool_{i}_{generate_random_text(2)}"
        tool_doc = f"""Tool {i} performs a unique operation on the input data. {generate_random_text(10)}
        
        :param input_data: The input data for the tool.
        :returns: A dictionary with success status and a unique response.
        """
        
        def fake_tool(input_data: str, tool_id=i):
            responses = [
                f"Tool {tool_id} processed input: {input_data}",
                f"Tool {tool_id} received: {input_data}",
                f"Input {input_data} was handled by tool {tool_id}",
            ]
            return {"success": True, "result": random.choice(responses)}
        
        fake_tool_fn = types.FunctionType(fake_tool.__code__, globals(), tool_name)
        fake_tool_fn.__doc__ = tool_doc
        print(tool_name)
        print(tool_doc[:100])
        fake_tool_fn = client_tool(fake_tool_fn)
        
        tools.append(fake_tool_fn)
    
    return tools

def generate_random_text(length=10):
    words = ["alpha", "bravo", "charlie", "delta", "echo", "foxtrot", "golf", "hotel", "india", "juliet", "kilo", "lima", "mike", "november", "oscar", "papa", "quebec", "romeo", "sierra", "tango", "uniform", "victor", "whiskey", "x-ray", "yankee", "zulu"]
    return " ".join(random.choices(words, k=length))

In [4]:
model_id = os.getenv("INFERENCE_MODEL")
# model_id = "meta-llama/Llama-3.2-3B-Instruct"
print(model_id)
inference_model = model_id.split("/")[1]
environment = "local" # "nerc" or "local"

base_url = f"http://localhost:{os.getenv('LLAMA_STACK_PORT')}" if environment == "local" else os.getenv("LLAMA_STACK_ENDPOINT")
print(base_url)
client = LlamaStackClient(
    base_url = base_url
)

real_tools = [weather_info, word_count, reverse_string, uppercase, insurance_scorer]

meta-llama/Llama-3.2-3B-Instruct
http://localhost:8321


In [5]:
print(len(real_tools))
print(real_tools[0].__doc__)
print(real_tools[0].__name__)

5
Fetches the current weather for a given location.
    
    :param loc: The location for which weather information is requested.
    :returns: A dictionary containing success status and the weather result.
    
weather_info


In [6]:
# calculate token size based on pull https://github.com/meta-llama/llama-stack/pull/1300
# from this cell, we know that the prompt tokens include system prompt and user prompt.
response = client.inference.chat_completion(
    messages=[
        {"role": "user", "content": "Give me an insurance evaluation score"}
    ],
    model_id=model_id,
    stream=False,
)
pprint(response)

response = client.inference.chat_completion(
    messages=[
        {"role": "system", "content": """You are an AI tool calling assistant. Must use the correct tool for each query.
            When using the tools:
            1. Extract the relevant number or values from the user's request.
            2. Use the correct tool to perform the operation.
            3. Present the result clearly.
            4. Handle errors gracefully."""},
        {"role": "user", "content": "Give me an insurance evaluation score"}
    ],
    model_id=model_id,
    stream=False,
)
pprint(response)

In [7]:
# miminc json tool format in test cases https://github.com/meta-llama/llama-stack/blob/main/tests/integration/test_cases/inference/chat_completion.json#L58C7-L69C9
# why json? chat_completion only accept json format tools, must be certain structure.
json_tool = [
    {
          "tool_name": "get_weather",
          "description": "Get the current weather",
          "parameters": {
            "location": {
              "param_type": "string",
              "description": "The city and state (both required), e.g. San Francisco, CA."
            }
          }
        },
        {
          "tool_name": "word_count",
          "description": "Count the number of words in a text",
          "parameters": {
            "text": {
              "param_type": "string",
              "description": "The input text to analyze."
            }
          }
        },
        {
          "tool_name": "reverse_string",
          "description": "Reverse a string",
          "parameters": {
            "text": {
              "param_type": "string",
              "description": "The input text to reverse."
            }
          }
        },
        {
          "tool_name": "uppercase",
          "description": "Convert a string to uppercase",
          "parameters": {
            "text": {
              "param_type": "string",
              "description": "The input text to convert."
            }
          }
        },
        {
          "tool_name": "insurance_scorer",
          "description": "Generate an insurance score",
          "parameters": {
            "text": {
              "param_type": "string",
              "description": "The input text to eval."
            }
          }
        }    
]

response = client.inference.chat_completion(
    messages=[
        {"role": "system", "content": """You are an AI tool calling assistant. Must use the correct tool for each query.
            When using the tools:
            1. Extract the relevant number or values from the user's request.
            2. Use the correct tool to perform the operation.
            3. Present the result clearly.
            4. Handle errors gracefully."""},
        {"role": "user", "content": "Give me an insurance evaluation score"}
    ],
    model_id=model_id,
    stream=False,
    tools=json_tool
)
pprint(response)

In [8]:
# try to manually calculate token size for tool sets.
from llama_models.llama3.api.chat_format import ChatFormat
from llama_models.llama3.api.tokenizer import Tokenizer
import json
tokenizer = Tokenizer.get_instance() # this is how pull 1300 calculate token size. not sure how it works with other models. https://github.com/meta-llama/llama-stack/pull/1300/files#diff-bfab1a9cce8bb39b87f331653f4bec3fa2c83302337416acafb3be17ac34d73e
formatter = ChatFormat(tokenizer)
encoded = formatter.encode_content(json.dumps(json_tool))
print(len(encoded.tokens))

227


# Investigate how llama stack deal with tools, so that could count tokens properly. 

In [9]:
# found two ways to convert tool to json format, all use tool definition. try see how it actually works.
# one by from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition

def convert_tool_to_tool_definition(tool_func) -> ToolDefinition:
    docstring = tool_func.__doc__
    lines = docstring.strip().split('\n')
    description = lines[0]
    param_lines = [line.strip() for line in lines if line.strip().startswith(':param')]

    parameters = {}
    for line in param_lines:
        parts = line.split(':')
        param_name = parts[1].strip()
        param_desc = parts[2].strip()
        parameters[param_name] = ToolParamDefinition(
            param_type="object",
            description=param_desc,
            required=True
        )

    return ToolDefinition(
        tool_name=tool_func.__name__,
        description=description,
        parameters=parameters
    )

# Convert your tools
tool_definitions = [convert_tool_to_tool_definition(tool) for tool in real_tools]

# Now convert ToolDefinition to JSON using convert_tooldef_to_openai_tool function
from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool

json_tools = [convert_tooldef_to_openai_tool(tool_def) for tool_def in tool_definitions]
pprint(json_tools)

In [10]:
# another way to convert tool to json format, by using JsonCustomToolGenerator

from llama_stack.models.llama.llama3.prompt_templates.system_prompts import JsonCustomToolGenerator

prompt_template = JsonCustomToolGenerator().gen(tool_definitions)
print(prompt_template.render())

Answer the user's question by making use of the following functions if needed.
If none of the function can be used, please say so.
Here is a list of functions in JSON format:
{
    "type": "function",
    "function": {
        "name": "weather_info",
        "description": "Fetches the current weather for a given location.",
        "parameters": {
            "type": "object",
            "properties": [
                {
                    "param loc": {
                        "type": "object",
                        "description": "The location for which weather information is requested."
                    }
                }
            ],
            "required": ["param loc"]
        }
    }
}
{
    "type": "function",
    "function": {
        "name": "word_count",
        "description": "Counts the number of words in the given text.",
        "parameters": {
            "type": "object",
            "properties": [
                {
                    "param text": {
     

In [11]:
# total_tools = 6
# tools = real_tools  + generate_fake_tools(total_tools - len(real_tools)-1)
# print(len(tools))

# agent = Agent(
#         client=client,
#         model=model_id,
#         instructions="""You are an AI tool calling assistant. Must use the correct tool for each query.
#         When using the tools:
#         1. Extract the relevant number or values from the user's request.
#         2. Use the correct tool to perform the operation.
#         3. Present the result clearly.
#         4. Handle errors gracefully.""",
#         tools=tools,
#     )
# query = "Give me an insurance evaluation score"
# i = 1
# print(f"\nUser: {query}")
# start_time = time.time()
# print(f"Agent id is {agent.agent_id}")
# session_id = agent.create_session(f"tool-experiment-session-{i+1}")
# print(f'session id is {session_id}')

# response = agent.create_turn(
#     messages=[
#         {"role": "user", "content": query}
#     ],
#     session_id=session_id,
#     stream=False,
# )
# session_response = client.agents.session.retrieve(
#                 session_id=session_id,
#                 agent_id=agent.agent_id,
#             )
# pprint(session_response)

In [None]:
# https://github.com/meta-llama/llama-stack/blob/441016bee8c6b3b7ce89e7809a903d3343b705e2/tests/integration/inference/test_text_inference.py#L316C1-L331C81
# def test_text_chat_completion_with_tool_calling_and_non_streaming(client_with_models, text_model_id, test_case):
#     tc = TestCase(test_case)

#     response = client_with_models.inference.chat_completion(
#         model_id=text_model_id,
#         messages=tc["messages"],
#         tools=tc["tools"],
#         tool_choice="auto",
#         stream=False,
#     )
#     # some models can return content for the response in addition to the tool call
#     assert response.completion_message.role == "assistant"

#     assert len(response.completion_message.tool_calls) == 1
#     assert response.completion_message.tool_calls[0].tool_name == tc["tools"][0]["tool_name"]
#     assert response.completion_message.tool_calls[0].arguments == tc["expected"]

# aiming to convert my tools to json format as llama stack do natually and then pass it to client.inference.chat_completion so that i can get some token size

In [12]:
response = client.inference.chat_completion(
    messages=[
        {"role": "system", "content": """You are an AI tool calling assistant. Must use the correct tool for each query.
            When using the tools:
            1. Extract the relevant number or values from the user's request.
            2. Use the correct tool to perform the operation.
            3. Present the result clearly.
            4. Handle errors gracefully."""},
        {"role": "user", "content": "Give me an insurance evaluation score"}
    ],
    model_id=model_id,
    stream=False,
    tools=json_tool
)
pprint(response)

assert response.completion_message.role == "assistant"
assert len(response.completion_message.tool_calls) == 1
assert response.completion_message.tool_calls[0].tool_name == "insurance_scorer"