In [9]:
import tiktoken


ENCODING = tiktoken.get_encoding("cl100k_base")


def _format_function_definitions(functions: list[dict]) -> str:
    """
    Generates TypeScript function type definitions.

    Args:
    - functions (list[dict]): List of dictionaries representing function definitions.

    Returns:
    - str: TypeScript function type definitions.
    """
    lines = ["namespace functions {"]

    for func in functions:
        if func.get("description"):
            lines.append(f"// {func['description']}")

        if func["parameters"].get("properties"):
            lines.append(f"type {func['name']} = (_: {{")
            lines.append(_format_object_properties(func["parameters"], 0))
            lines.append("}) => any;")
        else:
            lines.append(f"type {func['name']} = () => any;")

        lines.append("")

    lines.append("} // namespace functions")
    return "\n".join(lines)


def _format_object_properties(parameters: dict, indent: int) -> str:
    """
    Formats object properties for TypeScript type definitions.

    Args:
    - parameters (dict): Dictionary representing object parameters.
    - indent (int): Number of spaces for indentation.

    Returns:
    - str: Formatted object properties.
    """
    lines = []
    for name, param in parameters["properties"].items():
        if param.get("description") and indent < 2:
            lines.append(f"// {param['description']}")

        is_required = parameters.get("required") and name in parameters["required"]
        lines.append(
            f"{name}{'?:' if not is_required else ':'} {_format_type(param, indent)},"
        )

    return "\n".join([" " * indent + line for line in lines])


def _format_type(param: dict, indent: int) -> str:
    """
    Formats a single property type for TypeScript type definitions.

    Args:
    - param (dict): Dictionary representing a parameter.
    - indent (int): Number of spaces for indentation.

    Returns:
    - str: Formatted type for the given parameter.
    """
    type_ = param["type"]
    if type_ == "string":
        return (
            " | ".join([f'"{v}"' for v in param["enum"]])
            if param.get("enum")
            else "string"
        )
    elif type_ == "number":
        return (
            " | ".join([str(v) for v in param["enum"]])
            if param.get("enum")
            else "number"
        )
    elif type_ == "integer":
        return (
            " | ".join([str(v) for v in param["enum"]])
            if param.get("enum")
            else "integer"
        )
    elif type_ == "array":
        return (
            f"{_format_type(param['items'], indent)}[]"
            if param.get("items")
            else "any[]"
        )
    elif type_ == "boolean":
        return "boolean"
    elif type_ == "null":
        return "null"
    elif type_ == "object":
        return "{\n" + _format_object_properties(param, indent + 2) + "\n}"
    else:
        raise ValueError(f"Unsupported type: {type_}")


def _estimate_function_tokens(functions: list[dict]) -> int:
    """
    Estimates token count for a given list of functions.

    Args:
    - functions (list[dict]): List of dictionaries representing function definitions.

    Returns:
    - int: Estimated token count.
    """
    prompt_definitions = _format_function_definitions(functions)
    tokens = _string_tokens(prompt_definitions)
    tokens += 9  # Add nine per completion
    return tokens


def _string_tokens(string: str) -> int:
    """
    Estimates token count for a given string using 'cl100k_base' encoding.

    Args:
    - string (str): Input string.

    Returns:
    - int: Estimated token count.
    """
    global ENCODING
    return len(ENCODING.encode(string))


def _estimate_message_tokens(message: dict) -> int:
    """
    Estimates token count for a given message.

    Args:
    - message (dict): Dictionary representing a message.

    Returns:
    - int: Estimated token count.
    """
    components = [
        message.get("role"),
        message.get("content"),
        message.get("name"),
        message.get("function_call", {}).get("name"),
        message.get("function_call", {}).get("arguments"),
    ]
    components = [
        component for component in components if component
    ]  # Filter out None values
    tokens = sum([_string_tokens(component) for component in components])

    tokens += 3  # Add three per message
    if message.get("name"):
        tokens += 1
    if message.get("role") == "function":
        tokens -= 2
    if message.get("function_call"):
        tokens += 3

    return tokens


def estimate_tokens(
    messages: list[dict], functions: list[dict] = None, function_call=None
) -> int:
    """
    Estimates token count for a given prompt with messages and functions.

    Args:
    - messages (list[dict]): List of dictionaries representing messages.
    - functions (list[dict], optional): List of dictionaries representing function definitions. Default is None.
    - function_call (str or dict, optional): Function call specification. Default is None.

    Returns:
    - int: Estimated token count.
    """
    padded_system = False
    tokens = 0

    for msg in messages:
        if msg["role"] == "system" and functions and not padded_system:
            modified_message = {"role": msg["role"], "content": msg["content"] + "\n"}
            tokens += _estimate_message_tokens(modified_message)
            padded_system = True  # Mark system as padded
        else:
            tokens += _estimate_message_tokens(msg)

    tokens += 3  # Each completion has a 3-token overhead
    if functions:
        tokens += _estimate_function_tokens(functions)

    if functions and any(m["role"] == "system" for m in messages):
        tokens -= 4  # Adjust for function definitions

    if function_call and function_call != "auto":
        tokens += (
            1 if function_call == "none" else _string_tokens(function_call["name"]) + 4
        )

    return tokens

In [10]:
import openai
import os
import json
from dotenv import load_dotenv


load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

openai.api_key = OPENAI_API_KEY

In [15]:
example_calls = [
    {
        "messages": [
            {
                "role": "system",
                "content": "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.",
            }
        ],
        "tokens": 25,
    },
    {
        "messages": [
            {
                "role": "system",
                "name": "example_user",
                "content": "New synergies will help drive top-line growth.",
            }
        ],
        "tokens": 20,
    },
    {
        "messages": [
            {
                "role": "system",
                "name": "example_assistant",
                "content": "Things working well together will increase revenue.",
            }
        ],
        "tokens": 19,
    },
    {
        "messages": [
            {
                "role": "system",
                "name": "example_user",
                "content": "Let's circle back when we have more bandwidth to touch base on opportunities for increased leverage.",
            }
        ],
        "tokens": 28,
    },
    {
        "messages": [
            {
                "role": "system",
                "name": "example_assistant",
                "content": "Let's talk later when we're less busy about how to do better.",
            }
        ],
        "tokens": 26,
    },
    {
        "messages": [
            {
                "role": "user",
                "content": "This late pivot means we don't have time to boil the ocean for the client deliverable.",
            }
        ],
        "tokens": 26,
    },
    {"messages": [{"role": "user", "content": "hello world"}], "tokens": 9},
    {"messages": [{"role": "system", "content": "hello"}], "tokens": 8},
    {
        "messages": [
            {"role": "system", "content": "# Important: you're the best robot"},
            {"role": "user", "content": "hello robot"},
            {"role": "assistant", "content": "hello world"},
        ],
        "tokens": 27,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {"name": "foo", "parameters": {"type": "object", "properties": {}}}
        ],
        "tokens": 31,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {"name": "foo", "parameters": {"type": "object", "properties": {}}}
        ],
        "function_call": "none",
        "tokens": 32,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {"name": "foo", "parameters": {"type": "object", "properties": {}}}
        ],
        "function_call": "auto",
        "tokens": 31,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {"name": "foo", "parameters": {"type": "object", "properties": {}}}
        ],
        "function_call": {"name": "foo"},
        "tokens": 36,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {
                "name": "foo",
                "description": "Do a foo",
                "parameters": {"type": "object", "properties": {}},
            }
        ],
        "tokens": 36,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {
                "name": "bing_bong",
                "description": "Do a bing bong",
                "parameters": {
                    "type": "object",
                    "properties": {"foo": {"type": "string"}},
                },
            }
        ],
        "tokens": 49,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {
                "name": "bing_bong",
                "description": "Do a bing bong",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "foo": {"type": "string"},
                        "bar": {"type": "number", "description": "A number"},
                    },
                },
            }
        ],
        "tokens": 57,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {
                "name": "bing_bong",
                "description": "Do a bing bong",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "foo": {
                            "type": "object",
                            "properties": {
                                "bar": {"type": "string", "enum": ["a", "b", "c"]},
                                "baz": {"type": "boolean"},
                            },
                        }
                    },
                },
            }
        ],
        "tokens": 68,
    },
    {
        "messages": [
            {"role": "system", "content": "Hello"},
            {"role": "user", "content": "Hi there"},
        ],
        "functions": [
            {"name": "do_stuff", "parameters": {"type": "object", "properties": {}}}
        ],
        "tokens": 35,
    },
    {
        "messages": [
            {"role": "system", "content": "Hello:"},
            {"role": "user", "content": "Hi there"},
        ],
        "functions": [
            {"name": "do_stuff", "parameters": {"type": "object", "properties": {}}}
        ],
        "tokens": 35,
    },
    {
        "messages": [
            {"role": "system", "content": "Hello:"},
            {"role": "system", "content": "Hello"},
            {"role": "user", "content": "Hi there"},
        ],
        "functions": [
            {"name": "do_stuff", "parameters": {"type": "object", "properties": {}}}
        ],
        "tokens": 40,
    },
    {
        "messages": [
            {"role": "system", "content": "Hello:"},
            {"role": "system", "content": "Hello"},
            {"role": "user", "content": "Hi there"},
        ],
        "functions": [
            {"name": "do_stuff", "parameters": {"type": "object", "properties": {}}},
            {
                "name": "do_other_stuff",
                "parameters": {"type": "object", "properties": {}},
            },
        ],
        "tokens": 49,
    },
    {
        "messages": [
            {"role": "system", "content": "Hello:"},
            {"role": "system", "content": "Hello"},
            {"role": "user", "content": "Hi there"},
        ],
        "functions": [
            {"name": "do_stuff", "parameters": {"type": "object", "properties": {}}},
            {
                "name": "do_other_stuff",
                "parameters": {"type": "object", "properties": {}},
            },
        ],
        "function_call": {"name": "do_stuff"},
        "tokens": 55,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {
                "name": "get_recipe",
                "parameters": {
                    "type": "object",
                    "required": ["ingredients", "instructions", "time_to_cook"],
                    "properties": {
                        "ingredients": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "required": ["name", "unit", "amount"],
                                "properties": {
                                    "name": {"type": "string"},
                                    "unit": {
                                        "enum": [
                                            "grams",
                                            "ml",
                                            "cups",
                                            "pieces",
                                            "teaspoons",
                                        ],
                                        "type": "string",
                                    },
                                    "amount": {"type": "number"},
                                },
                            },
                        },
                        "instructions": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "Steps to prepare the recipe (no numbering)",
                        },
                        "time_to_cook": {
                            "type": "number",
                            "description": "Total time to prepare the recipe in minutes",
                        },
                    },
                },
            }
        ],
        "tokens": 106,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {
                "name": "function",
                "description": "description",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "quality": {
                            "type": "object",
                            "properties": {
                                "pros": {
                                    "type": "array",
                                    "items": {"type": "string"},
                                    "description": "Write 3 points why this text is well written",
                                }
                            },
                        }
                    },
                },
            }
        ],
        "tokens": 46,
    },
    {
        "messages": [{"role": "user", "content": "hello"}],
        "functions": [
            {
                "name": "function",
                "description": "desctiption1",
                "parameters": {
                    "type": "object",
                    "description": "desctiption2",
                    "properties": {
                        "mainField": {"type": "string", "description": "description3"},
                        "field number one": {
                            "type": "object",
                            "description": "description4",
                            "properties": {
                                "yesNoField": {
                                    "type": "string",
                                    "description": "description5",
                                    "enum": ["Yes", "No"],
                                },
                                "howIsInteresting": {
                                    "type": "string",
                                    "description": "description6",
                                },
                                "scoreInteresting": {
                                    "type": "number",
                                    "description": "description7",
                                },
                                "isInteresting": {
                                    "type": "string",
                                    "description": "description8",
                                    "enum": ["Yes", "No"],
                                },
                            },
                        },
                    },
                },
            }
        ],
        "tokens": 96,
    },
]

In [8]:
function1 = {
    "name": "get_data_1",
    "description": "Get data from the current table by specifying the datapoints you want to retrieve.",
    "parameters": {
        "type": "object",
        "properties": {
            "region": {
                "type": "array",
                "description": "Variables to include from: region ",
                "items": {
                    "type": "string",
                    "enum": [
                        "Sweden",
                        "Stockholm county",
                        "Uppsala county",
                        "Södermanland county",
                        "Östergötland county",
                    ],
                },
            },
            "born in Sweden/foreign born": {
                "type": "array",
                "description": "Variables to include from: born in Sweden/foreign born ",
                "items": {
                    "type": "string",
                    "enum": ["born in Sweden", "foreign born", "total"],
                },
            },
            "sex": {
                "type": "array",
                "description": "Variables to include from: sex ",
                "items": {"type": "string", "enum": ["woman", "men", "men and women "]},
            },
            "observations": {
                "type": "array",
                "description": "Variables to include from: observations ",
                "items": {
                    "type": "string",
                    "enum": ["Gainful employment rate, percent"],
                },
            },
            "year": {
                "type": "array",
                "description": "Variables to include from: year ",
                "items": {
                    "type": "string",
                    "enum": [
                        "1993",
                        "1994",
                        "1995",
                        "1996",
                        "1997",
                        "1998",
                        "1999",
                        "2000",
                        "2001",
                        "2002",
                        "2003",
                    ],
                },
            },
        },
        "required": ["region", "observations", "year"],
    },
}

function2 = {
    "name": "get_n_day_weather_forecast",
    "description": "Get an N-day weather forecast",
    "parameters": {
        "type": "object",
        "properties": {
            "location": {
                "type": "string",
                "description": "The city and state, e.g. San Francisco, CA",
            },
            "format": {
                "type": "string",
                "enum": ["celsius", "fahrenheit"],
                "description": "The temperature unit to use. Infer this from the users location.",
            },
            "num_days": {
                "type": "integer",
                "description": "The number of days to forecast",
            },
        },
        "required": ["location", "format", "num_days"],
    },
}

In [14]:
def get_chat_completion(messages, model, functions=None, function_call=None):
    if functions is None:
        response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            max_tokens=1,
            temperature=0,
        )
    else:
        if function_call == None:
            function_call = "auto"
        response = openai.ChatCompletion.create(
            model=model,
            messages=messages,
            functions=functions,
            function_call=function_call,
            max_tokens=1,
            temperature=0,
        )
    return response

In [16]:
models = ["gpt-4-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613"]
model = models[1]

messages = [{"role": "user", "content": "Package testing"}]
functions = [function1, function2]

for example in example_calls:
    messages = example.get("messages")
    functions = example.get("functions")
    function_call = example.get("function_call")

    try:
        token_estimation = estimate_tokens(messages, functions, function_call)
        response = get_chat_completion(messages, model, functions, function_call)
        real_token_count = response["usage"]["prompt_tokens"]
        print(f"real: {real_token_count}, estimated: {token_estimation}")
    except Exception as e:
        print(e)

real: 25, estimated: 25
real: 20, estimated: 20
real: 19, estimated: 19
real: 28, estimated: 28
real: 26, estimated: 26
real: 26, estimated: 26
real: 9, estimated: 9
real: 8, estimated: 8
real: 27, estimated: 27
real: 31, estimated: 31
real: 32, estimated: 32
real: 31, estimated: 31
real: 36, estimated: 36
real: 36, estimated: 36
real: 49, estimated: 49
real: 57, estimated: 57
real: 68, estimated: 68
real: 35, estimated: 35
real: 35, estimated: 35
real: 40, estimated: 40
real: 49, estimated: 49
real: 55, estimated: 55
real: 106, estimated: 106
real: 46, estimated: 46
real: 96, estimated: 96
