In [150]:
import json
import os
import pathlib
import requests
from typing import Literal, NotRequired, List, Union
from pprint import pprint
from llama_cpp import ChatCompletionMessage, Llama, LlamaGrammar

In [151]:
MODEL_PATH = "../../models/mistralai/Mistral-7B-Instruct-v0.1/ggml-model-f16.gguf"
llm = Llama(model_path=MODEL_PATH, verbose=False)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ../../models/mistralai/Mistral-7B-Instruct-v0.1/ggml-model-f16.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight f16      [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight f16      [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight f16      [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight f16      [  4096,  1024,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight f16      [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight f16      [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight f16      [  4096, 14336,     1,     1 ]
llama_model_loader: - tensor    7:            blk.0.ffn_down.weight

In [152]:
llama_grammar = LlamaGrammar.from_file("json.gbnf")
print(llama_grammar)

root ::= object 
object ::= [{] ws object_11 [}] ws 
value ::= object | array | string | number | value_6 ws 
array ::= [[] ws array_15 []] ws 
string ::= ["] string_18 ["] ws 
number ::= number_19 number_25 number_29 ws 
value_6 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] | [n] [u] [l] [l] 
ws ::= ws_31 
object_8 ::= string [:] ws value object_10 
object_9 ::= [,] ws string [:] ws value 
object_10 ::= object_9 object_10 | 
object_11 ::= object_8 | 
array_12 ::= value array_14 
array_13 ::= [,] ws value 
array_14 ::= array_13 array_14 | 
array_15 ::= array_12 | 
string_16 ::= [^"\] | [\] string_17 
string_17 ::= ["\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] 
string_18 ::= string_16 string_18 | 
number_19 ::= number_20 number_21 
number_20 ::= [-] | 
number_21 ::= [0-9] | [1-9] number_22 
number_22 ::= [0-9] number_22 | 
number_23 ::= [.] number_24 
number_24 ::= [0-9] number_24 | [0-9] 
number_25 ::= number_23 | 
number_26 ::= [eE] number_27 number_28 
number_27 ::= [-

from_string grammar:



In [153]:
FUNCTIONS = [
    {
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "The city and state, e.g. San Francisco, CA",
                },
                "unit": {"type": "string", "enum": ["metric", "uscs"]},
            },
            "required": ["location"],
        },
    },
    {
        "name": "binary_arithmetic",
        "description": "Perform binary arithmetic operations on two operands.",
        "parameters": {
            "type": "object",
            "properties": {
                "left_op": {
                    "type": ["integer", "number"],
                    "description": "The left operand.",
                },
                "right_op": {
                    "type": ["integer", "number"],
                    "description": "The right operand.",
                },
                "operator": {
                    "type": "string",
                    "description": "The arithmetic operator. Supported operators are '+', '-', '*', '/', '%'.",
                    "enum": ["+", "-", "*", "/", "%"],
                },
            },
            "required": ["left_op", "right_op", "operator"],
        },
    },
]


def get_current_weather(location: str, unit: str = "metric") -> str:
    """
    Get the current weather in a given location.
    Parameters:
    location (str): The city and state, e.g. San Francisco, CA
    unit (str): The unit system, can be either 'metric' or 'uscs'. Default is 'metric'.
    Returns:
    str: A string that describes the current weather.
    """

    # Replace spaces with hyphens and commas with underscores for the wttr.in URL
    location = location.replace(" ", "-").replace(",", "_")

    # Determine the unit query parameter
    unit_query = "m" if unit == "metric" else "u"
    # Set the API response formatting
    res_format = "%l+%T+%S+%s+%C+%w+%t"

    # Make a request to the wttr.in service
    response = requests.get(
        f"http://wttr.in/{location}?{unit_query}&format={res_format}"
    )

    # Check if the request was successful
    if response.status_code == 200:
        # Return the weather report
        return response.text
    else:
        return f"Could not get the weather for {location}."


def binary_arithmetic(
    left_op: Union[int, float], right_op: Union[int, float], operator: str
) -> Union[int, float]:
    """
    Perform binary arithmetic operations on two operands.

    Parameters:
    - left_op (int/float): The left operand.
    - right_op (int/float): The right operand.
    - operator (str): The arithmetic operator. Supported operators are '+', '-', '*', '/', '%'.

    Returns:
    - int/float: The result of the arithmetic operation.
    """
    if operator == "+":
        return left_op + right_op
    elif operator == "-":
        return left_op - right_op
    elif operator == "*":
        return left_op * right_op
    elif operator == "/":
        if right_op == 0:
            raise ValueError("Division by zero is not allowed.")
        return left_op / right_op
    elif operator == "%":
        return left_op % right_op
    else:
        raise ValueError(
            f"Unsupported operator '{operator}'. Supported operators are '+', '-', '*', '/', '%'."
        )

In [154]:
function_map = {
    "get_current_weather": get_current_weather,
    "binary_arithmetic": binary_arithmetic,
}

In [155]:
system_prompt = ChatCompletionMessage(
    role="system",
    content="""My name is Vincent and I am a helpful assistant. I can make function calls to retrieve information such as the current weather in a given location.\n{ "function_call": { "name": "get_current_weather", "arguments": { "location": "New York City, NY" } } }""",
)

In [156]:
def generate_chat_sequence(
    user_query: str,
    function_def: dict,
) -> List[ChatCompletionMessage]:
    messages = [system_prompt]
    user_message = ChatCompletionMessage(role="user", content=user_query)

    function_message = ChatCompletionMessage(
        role="function", content=json.dumps(function_def)
    )

    messages.extend([user_message, function_message])
    return messages


def generate_combined_chat_sequence(
    user_query: str,
    function_list: list,
) -> List[ChatCompletionMessage]:
    function_messages = [
        ChatCompletionMessage(role="function", content=json.dumps(func_def))
        for func_def in function_list
    ]

    user_message = ChatCompletionMessage(role="user", content=user_query)
    messages = [system_prompt] + function_messages + [user_message]
    return messages

In [157]:
messages = generate_chat_sequence("What is the weather like in New York City, New York today?", FUNCTIONS[0])
for message in messages:
    pprint(message)

{'content': 'My name is Vincent and I am a helpful assistant. I can make '
            'function calls to retrieve information such as the current '
            'weather in a given location.\n'
            '{ "function_call": { "name": "get_current_weather", "arguments": '
            '{ "location": "New York City, NY" } } }',
 'role': 'system'}
{'content': 'What is the weather like in New York City, New York today?',
 'role': 'user'}
{'content': '{"name": "get_current_weather", "description": "Get the current '
            'weather in a given location", "parameters": {"type": "object", '
            '"properties": {"location": {"type": "string", "description": "The '
            'city and state, e.g. San Francisco, CA"}, "unit": {"type": '
            '"string", "enum": ["metric", "uscs"]}}, "required": '
            '["location"]}}',
 'role': 'function'}


In [158]:
response = llm.create_chat_completion(messages=messages, grammar=llama_grammar, temperature=0)
# @abetlen: This is where we would expect a "function" role to be returned instead of an "assistant" role
# It doesn't do this just yet for obvious reasons.
pprint(response)

{'choices': [{'finish_reason': 'stop',
              'index': 0,
              'message': {'content': '{ "function_call": { "name": '
                                     '"get_current_weather", "arguments": { '
                                     '"location": "New York City, NY" } } }',
                          'role': 'assistant'}}],
 'created': 1696386851,
 'id': 'chatcmpl-3f90e960-2781-45fd-80a5-7ff50f6ba2f7',
 'model': '../../models/mistralai/Mistral-7B-Instruct-v0.1/ggml-model-f16.gguf',
 'object': 'chat.completion',
 'usage': {'completion_tokens': 35, 'prompt_tokens': 96, 'total_tokens': 131}}


In [159]:
# NOTE: The result varies from response to response, even with a temperature of 0.
assistant_content = response["choices"][0]["message"]["content"]
pprint(assistant_content)

('{ "function_call": { "name": "get_current_weather", "arguments": { '
 '"location": "New York City, NY" } } }')


In [160]:
function_content = json.loads(assistant_content)
pprint(function_content)

{'function_call': {'arguments': {'location': 'New York City, NY'},
                   'name': 'get_current_weather'}}


In [161]:
function_call = function_content["function_call"]
print(function_call)

{'name': 'get_current_weather', 'arguments': {'location': 'New York City, NY'}}


In [162]:
callback = None
for function in FUNCTIONS:
    if function["name"] == function_call["name"]:
        callback = function_map[function_call["name"]]
print(callback)


<function get_current_weather at 0x7f6fc3787ec0>


In [163]:
result = callback(function_call["arguments"]["location"])
print(result)

New-York-City_-NY 22:28:17-0400 06:55:38 18:33:15 Clear ↓4km/h +25°C


In [164]:
function_message = ChatCompletionMessage(role="user", content=result)
messages.append(function_message)
for message in messages:
    print(message)

{'role': 'system', 'content': 'My name is Vincent and I am a helpful assistant. I can make function calls to retrieve information such as the current weather in a given location.\n{ "function_call": { "name": "get_current_weather", "arguments": { "location": "New York City, NY" } } }'}
{'role': 'user', 'content': 'What is the weather like in New York City, New York today?'}
{'role': 'function', 'content': '{"name": "get_current_weather", "description": "Get the current weather in a given location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["metric", "uscs"]}}, "required": ["location"]}}'}
{'role': 'user', 'content': 'New-York-City_-NY 22:28:17-0400 06:55:38 18:33:15 Clear ↓4km/h +25°C'}


In [165]:
response = llm.create_chat_completion(messages)
pprint(response)

{'choices': [{'finish_reason': 'stop',
              'index': 0,
              'message': {'content': ' The current weather in New York City, '
                                     'NY is clear with a temperature of 25 '
                                     'degrees Celsius and a light breeze.',
                          'role': 'assistant'}}],
 'created': 1696386888,
 'id': 'chatcmpl-2a2ce9d7-7dbb-4a0c-a316-ce964c97f1d4',
 'model': '../../models/mistralai/Mistral-7B-Instruct-v0.1/ggml-model-f16.gguf',
 'object': 'chat.completion',
 'usage': {'completion_tokens': 27, 'prompt_tokens': 151, 'total_tokens': 178}}
