In [35]:
import json
import os
import pathlib
from typing import Literal, NotRequired

from llama_cpp import ChatCompletionMessage, Llama

model_path = "../../models/llama-2/llama-2-7b-chat/llama-2-7b-chat.GGUF.q8_0.bin"
llm = Llama(model_path=model_path, verbose=False)

system_prompt = ChatCompletionMessage(role="system", content="My name is Llama. I am a helpful assistant.")
user_prompt = ChatCompletionMessage(role="user", content="Hello! What is your name?")
messages = [system_prompt, user_prompt]

response = llm.create_chat_completion(messages=messages, temperature=0)
print(response)

llama_model_loader: loaded meta data with 16 key-value pairs and 291 tensors from ../../models/llama-2/llama-2-7b-chat/llama-2-7b-chat.GGUF.q8_0.bin (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q8_0     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:               output_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:                    output.weight q8_0     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_q.weight q8_0     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.attn_k.weight q8_0     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:              blk.0.attn_v.weight q8_0     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    6:         blk.0.attn_output.weight q8_0     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    7:            blk.0.ffn_gate.weight 

{'id': 'chatcmpl-72f2127e-a066-4d0e-87eb-af75236e3344', 'object': 'chat.completion', 'created': 1694813242, 'model': '../../models/llama-2/llama-2-7b-chat/llama-2-7b-chat.GGUF.q8_0.bin', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'My name is Llama. I am a helpful assistant. How may I assist you today?'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 34, 'completion_tokens': 20, 'total_tokens': 54}}


In [36]:
message = response["choices"][0]["message"]

messages.append(message)

print(message)

{'role': 'assistant', 'content': 'My name is Llama. I am a helpful assistant. How may I assist you today?'}


In [37]:
for message in messages:
    print(f"{message['role']}\n{message['content']}\n")

system
My name is Llama. I am a helpful assistant.

user
Hello! What is your name?

assistant
My name is Llama. I am a helpful assistant. How may I assist you today?



In [38]:
class ChatCompletionFunction(ChatCompletionMessage):
    """
    Extended chat completion message with additional role options.

    Inherits:
        ChatCompletionMessage: Base chat completion message class.

    Attributes:
        role (Literal["assistant", "user", "system", "function"]): The role of the message.
        content (str): The content of the message.
        function_call (NotRequired[str]): The function call associated with the message (optional).
        function_args (NotRequired[str]): The function arguments associated with the message (optional).
        user (NotRequired[str]): The user associated with the message (optional).
    """

    role: Literal["assistant", "user", "system", "function"]
    content: NotRequired[str]
    function_call: NotRequired[str]
    function_args: NotRequired[str]
    user: NotRequired[str]

functions = [
    {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
        "type": "object",
        "properties": {
          "location": {
            "type": "string",
            "description": "The city and state, e.g. San Francisco, CA"
          },
          "unit": { "type": "string", "enum": ["celsius", "fahrenheit"] }
        },
        "required": ["location"]
      }
    }
]

def get_current_weather(location: str, unit: str = "celsius"):
    """
    Get the current weather in a given location.

    Parameters:
    location (str): The city and state, e.g. San Francisco, CA
    unit (str): The unit of temperature, can be either 'celsius' or 'fahrenheit'. Default is 'celsius'.

    Returns:
    str: A string that describes the current weather.
    """

    # This is a mock function, so let's return a mock weather report.
    weather_report = f"The current weather in {location} is 20 degrees {unit}."
    return weather_report

In [39]:
user_input = {
    "role": "user", "content": "What is the weather like today in New York City, New York?"
}

messages.append(user_input)

response = llm.create_chat_completion(
    messages=messages,
    functions=functions,
    function_call="auto",  # None is default, auto is default in openai api
)

messages.append(response["choices"][0]["message"])

print(response)

{'id': 'chatcmpl-eb33e324-146f-494e-b84a-42b6a86ba2e9', 'object': 'chat.completion', 'created': 1694813258, 'model': '../../models/llama-2/llama-2-7b-chat/llama-2-7b-chat.GGUF.q8_0.bin', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'The current weather in New York City, New York is mostly sunny with a high temperature of 72 degrees Fahrenheit and a low temperature of 54 degrees Fahrenheit. Would you like to know the weather forecast for the next few days?'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 77, 'completion_tokens': 54, 'total_tokens': 131}}


In [40]:
for message in messages:
    print(f"{message['role']}\n{message['content']}\n")

system
My name is Llama. I am a helpful assistant.

user
Hello! What is your name?

assistant
My name is Llama. I am a helpful assistant. How may I assist you today?

user
What is the weather like today in New York City, New York?

assistant
The current weather in New York City, New York is mostly sunny with a high temperature of 72 degrees Fahrenheit and a low temperature of 54 degrees Fahrenheit. Would you like to know the weather forecast for the next few days?



In [None]:
function_call = response["choices"][0]["message"]["function_call"]

print(function_call)

In [None]:
function_name = function_call["name"]
function_args = json.loads(function_call["arguments"])

print(function_name, function_args)

In [None]:
if function_name == 'get_current_weather':
    location = function_args['location']
    unit = function_args.get('unit', 'celsius')  # use default 'celsius' if unit is not provided
    weather_report = get_current_weather(location, unit)
    # then pass weather_report back to the model as a new message
    print(weather_report)

In [None]:
messages[-1]["content"] = weather_report

for message in messages:
    print(f"{message['role']}\n{message['content']}\n")

In [None]:
messages.append({"role": "user", "content": "Thank you! How should I dress for that kind of temperature?"})

for message in messages:
    print(f"{message['role']}\n{message['content']}\n")


In [None]:
# Call the model again with the updated messages
response = llm.create_chat_completion(
    messages=messages,
    functions=functions,
    function_call="auto",  # auto is default, but we'll be explicit
)

print(response)

In [None]:
messages.append(response["choices"][0]["message"])

for message in messages:
    print(f"{message['role']}\n{message['content']}\n")