##### What is this notebook about?
- This notebook shows how to use ChatGPT API for inference, 
- It also shows how to print real number of tokens used, how to estimate number of tokens & calculcate pricing


In [1]:
import numpy as np
import pandas as pd
import openai
from openai import OpenAI
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
api_key = os.getenv('sk')
client = OpenAI(
  api_key=api_key,
)


In [2]:
response = client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
    {
      "role": "system",
      "content": "You are an AI assistant."
    },
    {
      "role": "user",
      "content": "Respond with 'Hello' if you got this message"
    }
  ],
  #temperature=1,
  #max_tokens=256,
  #top_p=1
)

In [3]:
response_text = response.choices[0].message.content.strip()
print("Raw response:")
print(response_text)

response_text = response_text.lower().split('\n')
print("Processed response")
print(response_text)

Raw response:
Hello!
Processed response
['hello!']


In [6]:
# Real number of tokens

print("Usage:")
print(response.usage)
print(f"total tokens = {response.usage.total_tokens} (prompt_tokens={response.usage.prompt_tokens}, completion_tokens={response.usage.completion_tokens})")

Usage:
CompletionUsage(completion_tokens=3, prompt_tokens=27, total_tokens=30, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0))
total tokens = 30 (prompt_tokens=27, completion_tokens=3)


In [8]:
# Estimate number of tokens

import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"):
  """Returns the number of tokens used by a list of messages."""
  try:
      encoding = tiktoken.encoding_for_model(model)
  except KeyError:
      encoding = tiktoken.get_encoding("o200k_base")
  if 1: #model == "gpt-3.5-turbo-0613":  # note: future models may deviate from this
      num_tokens = 0
      for message in messages:
          num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
          for key, value in message.items():
              num_tokens += len(encoding.encode(value))
              if key == "name":  # if there's a name, the role is omitted
                  num_tokens += -1  # role is always required and always 1 token
      num_tokens += 2  # every reply is primed with <im_start>assistant
      return num_tokens
  else:
      raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.""")

# Example 1
prompt = "tiktoken is great!"
num_tokens = num_tokens_from_string(prompt, "o200k_base")
print(num_tokens)

# Example 2
prompt = "tiktoken is great!"
messages=[{"role": "user", "content": prompt}]
num_tokens = num_tokens_from_messages(messages, "gpt-4o")
print(num_tokens)

# Example 3
prompt = "Respond with 'Hello' if you got this message"
messages=[
{
    "role": "system",
    "content": "You are an AI assistant."
},
{
    "role": "user",
    "content": prompt
}
]
num_tokens = num_tokens_from_messages(messages, "gpt-4o-mini")
print(num_tokens)

6
13
28


In [None]:
# Example Pricing on 09/05/2025:

# gpt-4o: 
# Input (prompt):
# $2.50/1M tokens => 1000 tokens: 0.25 cents
# Output (completion):
# $10.00/1M tokens => 1000 tokens: 1 cent

### References:

> https://platform.openai.com/docs/overview  
> https://platform.openai.com/docs/api-reference/chat/create  
> https://platform.openai.com/docs/advanced-usage   
> https://platform.openai.com/docs/pricing  
> https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken  
