# How to call functions with chat models

This tutorial is built upon the [function call cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_call_functions_with_chat_models.ipynb) from OpenAI.

## How to generate function arguments

In [1]:
# !pip install scipy --quiet
# !pip install tenacity --quiet
# !pip install tiktoken --quiet
# !pip install termcolor --quiet
# !pip install openai --quiet

In [2]:
# Create ~/.env file with this line: OPENAI_API_KEY=<your key here>
# You can get your key from https://platform.openai.com/api-keys 
import openai
import dotenv
import os
dotenv.load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GPT_MODEL = "gpt-3.5-turbo"

In [3]:
import openai

from lastmile_eval.rag.debugger.api.tracing import LastMileTracer

from tracing_auto_instrumentation.openai import wrap_openai
from lastmile_eval.rag.debugger.tracing.sdk import get_lastmile_tracer

tracer: LastMileTracer = get_lastmile_tracer(
    tracer_name="OpenAI Text Calling w. Streaming",
)
client = openai.OpenAI(api_key=OPENAI_API_KEY)
client = wrap_openai(client, tracer)

[DEBUG] 2024-07-03 12:19:50,307 _config.py:80: load_ssl_context verify=True cert=None trust_env=True http2=False
[DEBUG] 2024-07-03 12:19:50,308 _config.py:146: load_verify_locations cafile='/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/certifi/cacert.pem'


In [4]:
import json
from tenacity import retry, wait_random_exponential, stop_after_attempt
from termcolor import colored

### Utilities

First let's define a few utilities for making calls to the Chat Completions API and for maintaining and keeping track of the conversation state.

In [5]:
@retry(wait=wait_random_exponential(multiplier=1, max=40), stop=stop_after_attempt(3))
def chat_completion_request(messages, tools=None, tool_choice=None, model=GPT_MODEL, stream=True):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            tools=tools,
            tool_choice=tool_choice,
            stream=stream,
        )
        return response
    except Exception as e:
        print("Unable to generate ChatCompletion response")
        print(f"Exception: {e}")
        return e


In [6]:
def pretty_print_conversation(messages):
    role_to_color = {
        "system": "red",
        "user": "green",
        "assistant": "blue",
        "function": "magenta",
    }
    
    for message in messages:
        if message["role"] == "system":
            print(colored(f"system: {message['content']}\n", role_to_color[message["role"]]))
        elif message["role"] == "user":
            print(colored(f"user: {message['content']}\n", role_to_color[message["role"]]))
        elif message["role"] == "assistant" and message.get("function_call"):
            print(colored(f"assistant: {message['function_call']}\n", role_to_color[message["role"]]))
        elif message["role"] == "assistant" and not message.get("function_call"):
            print(colored(f"assistant: {message['content']}\n", role_to_color[message["role"]]))
        elif message["role"] == "function":
            print(colored(f"function ({message['name']}): {message['content']}\n", role_to_color[message["role"]]))


### Basic concepts

Let's create some function specifications to interface with a hypothetical weather API. We'll pass these function specification to the Chat Completions API in order to generate function arguments that adhere to the specification.

In [7]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "format": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "The temperature unit to use. Infer this from the users location.",
                    },
                },
                "required": ["location", "format"],
            },
        }
    },
    {
        "type": "function",
        "function": {
            "name": "get_n_day_weather_forecast",
            "description": "Get an N-day weather forecast",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "format": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "The temperature unit to use. Infer this from the users location.",
                    },
                    "num_days": {
                        "type": "integer",
                        "description": "The number of days to forecast",
                    }
                },
                "required": ["location", "format", "num_days"]
            },
        }
    },
]

## Non-Async Functions

In [8]:
def execute_tool_call_and_display_response(stream: bool):
    messages = []
    messages.append({"role": "system", "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."})
    messages.append({"role": "user", "content": "What's the weather like today in Glasgow, Scotland?"})
    chat_response = chat_completion_request(
        messages, tools=tools, stream=stream
    )
    if stream:
        for chunk in chat_response:
            print(f"{chunk=}")
    else:
        print(f"{chat_response=}")


In [9]:
# Without Streaming
execute_tool_call_and_display_response(stream=False)

[DEBUG] 2024-07-03 12:19:50,342 _base_client.py:446: Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."}, {'role': 'user', 'content': "What's the weather like today in Glasgow, Scotland?"}], 'model': 'gpt-3.5-turbo', 'stream': False, 'tool_choice': None, 'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}}, 'required': ['location', 'format']}}}, {'type': 'function', 'function': {'name': 'get_n_day_weather_forecast', 'description': 'Get a

chat_response=ChatCompletion(id='chatcmpl-9gzo6zigtb417lthNFUclxw8UOnyy', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_MPpMg2sDWn0YhIU1ivH6q7do', function=Function(arguments='{"location":"Glasgow, Scotland","format":"celsius"}', name='get_current_weather'), type='function')]))], created=1720034390, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=24, prompt_tokens=196, total_tokens=220))


In [10]:
# With Streaming
execute_tool_call_and_display_response(stream=True)

[DEBUG] 2024-07-03 12:19:52,491 _base_client.py:446: Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."}, {'role': 'user', 'content': "What's the weather like today in Glasgow, Scotland?"}], 'model': 'gpt-3.5-turbo', 'stream': True, 'tool_choice': None, 'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}}, 'required': ['location', 'format']}}}, {'type': 'function', 'function': {'name': 'get_n_day_weather_forecast', 'description': 'Get an

chunk=ChatCompletionChunk(id='chatcmpl-9gzo899c0NsVrtVO0vPOKRMzW7Crm', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role='assistant', tool_calls=[ChoiceDeltaToolCall(index=0, id='call_ocxcueR45covydAzV4LhpkGo', function=ChoiceDeltaToolCallFunction(arguments='', name='get_current_weather'), type='function')]), finish_reason=None, index=0, logprobs=None)], created=1720034392, model='gpt-3.5-turbo-0125', object='chat.completion.chunk', system_fingerprint=None, usage=None)
chunk=ChatCompletionChunk(id='chatcmpl-9gzo899c0NsVrtVO0vPOKRMzW7Crm', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1720034392, model='gpt-3.5-turbo-0125', object='chat.completion.chunk', system_fingerprint=None, usage=None)
chunk=ChatCompletionChunk(id='chatcmpl-9gzo899c0NsVrtVO0vPOKR

[DEBUG] 2024-07-03 12:19:53,385 connectionpool.py:546: https://lastmileai.dev:443 "POST /api/trace/create HTTP/11" 200 10
[DEBUG] 2024-07-03 12:19:53,387 connectionpool.py:1051: Starting new HTTPS connection (1): lastmileai.dev:443
[DEBUG] 2024-07-03 12:19:53,785 connectionpool.py:546: https://lastmileai.dev:443 "POST /api/rag_query_traces/create HTTP/11" 200 None
[DEBUG] 2024-07-03 12:19:53,787 connectionpool.py:1051: Starting new HTTPS connection (1): lastmileai.dev:443
[DEBUG] 2024-07-03 12:19:54,188 connectionpool.py:546: https://lastmileai.dev:443 "POST /api/rag_events/create HTTP/11" 200 None


## Async Functions

In [21]:
client = openai.AsyncOpenAI(api_key=OPENAI_API_KEY)
client = wrap_openai(client, tracer)

@retry(wait=wait_random_exponential(multiplier=1, max=40), stop=stop_after_attempt(3))
async def async_chat_completion_request(messages, tools=None, tool_choice=None, model=GPT_MODEL, stream=True):
    try:
        print("about to print response")
        response = await client.chat.completions.create(
            model=model,
            messages=messages,
            tools=tools,
            tool_choice=tool_choice,
            stream=stream,
        )
        print(f"{response=}")
        return response
    except Exception as e:
        print("Unable to generate ChatCompletion response")
        print(f"Exception: {e}")
        return e

[DEBUG] 2024-07-03 12:23:18,435 _config.py:80: load_ssl_context verify=True cert=None trust_env=True http2=False
[DEBUG] 2024-07-03 12:23:18,437 _config.py:146: load_verify_locations cafile='/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/certifi/cacert.pem'


In [22]:
async def execute_async_tool_call_and_display_response(stream: bool):
    messages = []
    messages.append({"role": "system", "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."})
    messages.append({"role": "user", "content": "What's the weather like today in Glasgow, Scotland?"})
    chat_response = await async_chat_completion_request(
        messages, tools=tools, stream=stream
    )
    if stream:
        async for chunk in chat_response:
            print(f"{chunk=}")
    else:
        print(f"{chat_response=}")

In [None]:
# Without Streaming
await execute_async_tool_call_and_display_response(stream=False)

[DEBUG] 2024-07-03 12:23:22,703 _base_client.py:446: Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."}, {'role': 'user', 'content': "What's the weather like today in Glasgow, Scotland?"}], 'model': 'gpt-3.5-turbo', 'stream': False, 'tool_choice': None, 'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}}, 'required': ['location', 'format']}}}, {'type': 'function', 'function': {'name': 'get_n_day_weather_forecast', 'description': 'Get a

about to print response


[DEBUG] 2024-07-03 12:23:23,630 _trace.py:85: receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Wed, 03 Jul 2024 19:23:23 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'lastmile-ai'), (b'openai-processing-ms', b'673'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'5000000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'4999939'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'0s'), (b'x-request-id', b'req_a001c2759259c61e9977e5f50f2c3273'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Set-Cookie', b'__cf_bm=4V12cAGwv66SHQAVefqF38wkkW6lnzRrdBo6LYvkF2c-1720034603-1.0.1.1-4TkYq9b2tREAIym9hb.v94bm0Wv_KhgWbm8yA5d7adUM.GOBd0Dti36nV7TuyDbI_TZvyCmb8s4EzUnapjBqdw; path=/; expir

response=ChatCompletion(id='chatcmpl-9gzrXaCATPGExrbeJXKhvSamlXj3O', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_W3XsPDgvT8QMIlCe6QuhT3Cj', function=Function(arguments='{"location":"Glasgow, Scotland","format":"celsius"}', name='get_current_weather'), type='function')]))], created=1720034603, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=24, prompt_tokens=196, total_tokens=220))
chat_response=ChatCompletion(id='chatcmpl-9gzrXaCATPGExrbeJXKhvSamlXj3O', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_W3XsPDgvT8QMIlCe6QuhT3Cj', function=Function(arguments='{"location":"Glasgow, Scotland","format":"celsius"}', nam

[ERROR] 2024-07-03 12:23:23,649 __init__.py:156: Failed to detach context
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/opentelemetry/trace/__init__.py", line 570, in use_span
    yield span
  File "/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/opentelemetry/sdk/trace/__init__.py", line 1091, in start_as_current_span
    yield span
  File "/Users/rossdancraig/Projects/lmai/repos/eval/src/lastmile_eval/rag/debugger/tracing/lastmile_tracer.py", line 282, in start_as_current_span
    yield span
  File "/Users/rossdancraig/Projects/lmai/repos/tracing_auto_instrumentation/src/tracing_auto_instrumentation/openai/async_openai_wrapper.py", line 313, in _create_impl
    yield raw_response
GeneratorExit

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniconda/base/envs/eval/lib/python3.12/site-packages/op

In [24]:
# With Streaming
await execute_async_tool_call_and_display_response(stream=True)

[DEBUG] 2024-07-03 12:24:03,778 _base_client.py:446: Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."}, {'role': 'user', 'content': "What's the weather like today in Glasgow, Scotland?"}], 'model': 'gpt-3.5-turbo', 'stream': True, 'tool_choice': None, 'tools': [{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}}, 'required': ['location', 'format']}}}, {'type': 'function', 'function': {'name': 'get_n_day_weather_forecast', 'description': 'Get an

about to print response
response=<async_generator object AsyncCompletionsWrapper._create_impl at 0x1385746c0>


[DEBUG] 2024-07-03 12:24:04,364 _trace.py:85: receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Wed, 03 Jul 2024 19:24:04 GMT'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'lastmile-ai'), (b'openai-processing-ms', b'281'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'5000000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'4999939'), (b'x-ratelimit-reset-requests', b'6ms'), (b'x-ratelimit-reset-tokens', b'0s'), (b'x-request-id', b'req_cfc598a4da51b75afdb15e5f11621419'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'89d940ec5cfc97fd-SJC'), (b'alt-svc', b'h3=":443"; ma=86400')])
[INFO] 2024-07-03 12:24:04,368 _client.py:1773: HTTP Request: POST ht

chunk=ChatCompletionChunk(id='chatcmpl-9gzsBoO26gbtNZJJMquAvhrfKJjyB', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role='assistant', tool_calls=[ChoiceDeltaToolCall(index=0, id='call_YBT3Gkln0iO10ILkYEROawhu', function=ChoiceDeltaToolCallFunction(arguments='', name='get_current_weather'), type='function')]), finish_reason=None, index=0, logprobs=None)], created=1720034643, model='gpt-3.5-turbo-0125', object='chat.completion.chunk', system_fingerprint=None, usage=None)
chunk=ChatCompletionChunk(id='chatcmpl-9gzsBoO26gbtNZJJMquAvhrfKJjyB', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None)]), finish_reason=None, index=0, logprobs=None)], created=1720034643, model='gpt-3.5-turbo-0125', object='chat.completion.chunk', system_fingerprint=None, usage=None)
chunk=ChatCompletionChunk(id='chatcmpl-9gzsBoO26gbtNZJJMquAvh

[DEBUG] 2024-07-03 12:24:04,950 connectionpool.py:546: https://lastmileai.dev:443 "POST /api/rag_query_traces/create HTTP/11" 200 None
[DEBUG] 2024-07-03 12:24:04,955 connectionpool.py:1051: Starting new HTTPS connection (1): lastmileai.dev:443
[DEBUG] 2024-07-03 12:24:05,333 connectionpool.py:546: https://lastmileai.dev:443 "POST /api/rag_events/create HTTP/11" 200 None
