#  OpenAI API Chat Completions

In [24]:
# @title Dependencies
!pip install --quiet tiktoken
!pip install --quiet openai
!pip install --quiet pandas
!pip install --quiet ipywidgets
!pip install --quiet python-dotenv

In [None]:
# @title OPENAI_API_KEY Environment Variable

import dotenv
dotenv.load_dotenv('./.env')

## Links

#### 🧠 Model & Pricing Information
- [📄 OpenAI Model Overview](https://platform.openai.com/docs/models) — Learn about GPT models and their capabilities.
- [💰 OpenAI Pricing Page](https://openai.com/pricing) — Official token pricing for all models.
- [📊 OpenAI Tokenizer Tool](https://platform.openai.com/tokenizer) — Check how your text maps to tokens.

#### 🛠️ API & Dashboard
- [🧪 OpenAI API Chat Completions Reference](https://platform.openai.com/docs/api-reference/chat) — Official docs for completions, chat, and more.
- [📂 OpenAI API Dashboard](https://platform.openai.com/account/usage) — Monitor your token usage and cost.
- [🔐 API Key Management](https://platform.openai.com/account/api-keys) — Create and manage your API tokens.

#### 📚 Tools & Guides
- [🧠 OpenAI Cookbook (GitHub)](https://github.com/openai/openai-cookbook) — Code examples and best practices.
- [🧰 OpenAI `tiktoken` Library](https://github.com/openai/tiktoken) — Tokenizer library used by the models.
- [✍️ Prompt Engineering Guide](https://github.com/dair-ai/Prompt-Engineering-Guide) — Comprehensive tips and examples.



## Models

> 🛠️ Initialization of openai, models with their costs

In [32]:
# @title Initalization
models_list = {
    "o4-mini":            {"input": 1.10,	"cached_input": 0.275,	"output": 0.20},
    "o3":                 {"input": 10.00, 	"cached_input": 2.50, 	"output": 40.00},
    "o3-mini":            {"input": 1.10, 	"cached_input": 0.55, 	"output": 4.40},
    "o1":                 {"input": 15.00, 	"cached_input": 7.50, 	"output": 60.00},
    "o1-mini":            {"input": 1.10, 	"cached_input": 0.55, 	"output": 4.40},
    "gpt-4o":             {"input": 2.50, 	"cached_input": 1.25, 	"output": 10.00},
    "gpt-4o-mini":        {"input": 0.15, 	"cached_input": 0.075, 	"output": 0.60},
    "gpt-4.1":            {"input": 2.00, 	"cached_input": 0.50, 	"output": 8.00},
    "gpt-4.1-mini":       {"input": 0.40, 	"cached_input": 0.10, 	"output": 1.60},
    "gpt-4.1-nano":       {"input": 0.10, 	"cached_input": 0.025, 	"output": 0.40},
    "gpt-4":              {"input": 30.00, 	"cached_input": None, 	"output": 60.00},
    "gpt-4-turbo":        {"input": 10.00, 	"cached_input": None, 	"output": 30.00},
    "gpt-3.5-turbo":      {"input": 0.50, 	"cached_input": None, 	"output": 1.50},
}

available_models = list(models_list.keys())

### 🧠 Models: Use Cases

| Model             | Good Use Cases                                              | Not Ideal For                                              | Price Sensitive |
|------------------|-------------------------------------------------------------|-------------------------------------------------------------|-----------------|
| **o4-mini**       | • Balanced reasoning + cost <br>• Coding and math helpers   | • Ultra-fast tasks <br>• High-fidelity generation           | ✅✅             |
| **o3**            | • Advanced math and code <br>• Expert tutoring agents       | • Cheap deployment <br>• Simple text processing             | ❌❌❌           |
| **o3-mini**       | • Customer support bots <br>• Basic analysis                | • Complex logic <br>• Audio/image understanding             | ✅              |
| **o1**            | • Formal logic chains <br>• Strategic AI planning           | • Real-time UIs <br>• Cost-sensitive scaling                | ❌❌             |
| **o1-mini**       | • Email summarization <br>• Medium-depth assistants         | • Deep reasoning <br>• Creative writing                     | ✅              |
| **gpt-4o**        | • Multimodal chat (vision/audio) <br>• Assistants at scale  | • On-device use <br>• Bulk simple queries                   | ✅              |
| **gpt-4o-mini**   | • Chatbots at scale <br>• Fast classification tasks         | • Deep summarization <br>• Complex context                  | ✅✅✅           |
| **gpt-4.1**       | • Complex reasoning <br>• Document understanding            | • Real-time apps <br>• Budget tasks                         | ❌              |
| **gpt-4.1-mini**  | • General assistants <br>• Customer chat                    | • Expert logic <br>• Fast games                             | ✅✅             |
| **gpt-4.1-nano**  | • Input validation <br>• Edge/fast inference                | • Rich generation <br>• Nuanced conversation                | ✅✅✅           |
| **gpt-4**         | • Legal, medical, research <br>• Best-in-class generation   | • Anything with cost limits <br>• High-volume chat          | ❌❌❌           |
| **gpt-4-turbo**   | • Premium AI copilots <br>• Multiturn apps                  | • Mobile agents <br>• Cheap API endpoints                   | ❌              |
| **gpt-3.5-turbo** | • FAQ bots <br>• Affordable AI integration                  | • Creative content <br>• Technical reasoning                | ✅✅             |


In [33]:
# @title Price List
# Correcting the price table rendering by avoiding any pipe characters within the cell values
from IPython.display import Markdown, Code, display

# Calculate per-token pricing
def compute_price_table(prices):
    rows = []
    for model, price in prices.items():
        input_per_token = price["input"] / 1_000_000
        output_per_token = price["output"] / 1_000_000
        cached_input_token = (
            f"\${price['cached_input'] / 1_000_000:.6f}" if price["cached_input"] is not None else "N/A"
        )
        rows.append({
            "model": model,
            "input_1M": f"\${price['input']:.2f}",
            "cached_input_1M": f"\${price['cached_input']:.2f}" if price["cached_input"] is not None else "N/A",
            "output_1M": f"\${price['output']:.2f}",
            "input_token": f"\${input_per_token:.6f}",
            "cached_input_token": cached_input_token,
            "output_token": f"\${output_per_token:.6f}",
        })
    return rows

# Convert to Markdown
def to_markdown_table(rows):
    headers = ["Model", "Input (1M)", "Cached Input (1M)", "Output (1M)",
               "Input/token", "Cached Input/token", "Output/token"]
    header_line = "| " + " | ".join(headers) + " |"
    separator = "| " + " | ".join(["---"] * len(headers)) + " |"
    lines = [header_line, separator]
    for row in rows:
        line = f"| {row['model']} | {row['input_1M']} | {row['cached_input_1M']} | {row['output_1M']} | " \
               f"{row['input_token']} | {row['cached_input_token']} | {row['output_token']} |"
        lines.append(line)
    return "\r\n".join(lines)

# Generate and display table
table_data = compute_price_table(models_list)
markdown_table = to_markdown_table(table_data)
display(Markdown(markdown_table))


| Model | Input (1M) | Cached Input (1M) | Output (1M) | Input/token | Cached Input/token | Output/token |
| --- | --- | --- | --- | --- | --- | --- |
| o4-mini | \$1.10 | \$0.28 | \$0.20 | \$0.000001 | \$0.000000 | \$0.000000 |
| o3 | \$10.00 | \$2.50 | \$40.00 | \$0.000010 | \$0.000003 | \$0.000040 |
| o3-mini | \$1.10 | \$0.55 | \$4.40 | \$0.000001 | \$0.000001 | \$0.000004 |
| o1 | \$15.00 | \$7.50 | \$60.00 | \$0.000015 | \$0.000008 | \$0.000060 |
| o1-mini | \$1.10 | \$0.55 | \$4.40 | \$0.000001 | \$0.000001 | \$0.000004 |
| gpt-4o | \$2.50 | \$1.25 | \$10.00 | \$0.000003 | \$0.000001 | \$0.000010 |
| gpt-4o-mini | \$0.15 | \$0.07 | \$0.60 | \$0.000000 | \$0.000000 | \$0.000001 |
| gpt-4.1 | \$2.00 | \$0.50 | \$8.00 | \$0.000002 | \$0.000000 | \$0.000008 |
| gpt-4.1-mini | \$0.40 | \$0.10 | \$1.60 | \$0.000000 | \$0.000000 | \$0.000002 |
| gpt-4.1-nano | \$0.10 | \$0.03 | \$0.40 | \$0.000000 | \$0.000000 | \$0.000000 |
| gpt-4 | \$30.00 | N/A | \$60.00 | \$0.000030 | N/A | \$0.000060 |
| gpt-4-turbo | \$10.00 | N/A | \$30.00 | \$0.000010 | N/A | \$0.000030 |
| gpt-3.5-turbo | \$0.50 | N/A | \$1.50 | \$0.000000 | N/A | \$0.000002 |

## 💬 Input

In [34]:
# @title 🔘 Model Selection
# @markdown Use this dropdown to select which model you want to simulate or test within the notebook. This model will be used throughout the context and prompt sections below for cost estimation and token usage tracking.
import ipywidgets as widgets
from IPython.display import display

modelSelection = widgets.Dropdown(
    options=available_models,
    value=available_models[0],
    description='Model:',
    disabled=False,
)

display(modelSelection)

Dropdown(description='Model:', options=('o4-mini', 'o3', 'o3-mini', 'o1', 'o1-mini', 'gpt-4o', 'gpt-4o-mini', …

In [30]:
# @title 🧾 Model Context
# @markdown The context section allows you to simulate or input a system prompt, previous conversation history, or predefined settings that might be passed with each request. Context tokens contribute to input cost and are useful for estimating token budgets ahead of time.
import ipywidgets as widgets
from IPython.display import display

# Create two textarea widgets
contextInput = widgets.Textarea(
    value='',
    placeholder='...',
    description='Context:',
    layout=widgets.Layout(width='100%', height='100px')
)

display(contextInput)

Textarea(value='', description='Context:', layout=Layout(height='100px', width='100%'), placeholder='...')

In [None]:
# @title -
# @markdown 📏 Precalculate Context Token Count
import tiktoken

enc = tiktoken.get_encoding("cl100k_base")
context_token_count = len(enc.encode(contextInput.value))

display(Markdown(f"Context Token Count: {context_token_count}"))

In [29]:
# @title 💬 Prompt
# @markdown Here you can input the actual user message or prompt that the model should respond to. This is what would typically be typed in a chat interface or passed via API.
import ipywidgets as widgets
from IPython.display import display

# Create two textarea widgets
promptInput = widgets.Textarea(
    value='',
    placeholder='...',
    description='Prompt:',
    layout=widgets.Layout(width='100%', height='100px')
)

display(promptInput)

Textarea(value='', description='Prompt:', layout=Layout(height='100px', width='100%'), placeholder='...')

In [None]:
# @title -
# @markdown 📏 Precalculate Prompt Token Count
import tiktoken

enc = tiktoken.get_encoding("cl100k_base")
context_token_count = len(enc.encode(promptInput.value))

display(Markdown(f"Context Token Count: {context_token_count}"))

In [None]:
# @title 💸 Input Costs
# @markdown This section calculates the estimated cost of uncached input tokens — including both the system context and user prompt that are sent to the model. It uses the selected model’s pricing to determine the cost of tokens that require fresh processing (not served from cache). Cached inputs are not included here. This helps you understand the baseline cost of new inputs in each interaction.

import tiktoken

# Get token counts
enc = tiktoken.get_encoding("cl100k_base")
context_tokens = len(enc.encode(contextInput.value))
prompt_tokens = len(enc.encode(promptInput.value))
total_input_tokens = context_tokens + prompt_tokens

# Calculate cost based on selected model
input_cost_per_token = models_list[modelSelection.value]["input"] / 1_000_000  # Convert from per million to per token
total_input_cost = total_input_tokens * input_cost_per_token

# Display results
display(Markdown(f"""
### Input Token Counts & Costs
- Context tokens: {context_tokens:,}
- Prompt tokens: {prompt_tokens:,}
- Total input tokens: {total_input_tokens:,}
- Cost per token: \${input_cost_per_token:.6f}
- **Total input cost: \${total_input_cost:.4f}**
"""))

---

## ⚠️ Warning: Cost-Generating Section

⚠️ Note: Executing the API call in the following section may result in usage charges based on the selected model and token count. Make sure to review the context and prompt content beforehand and be aware of the token pricing. Use the token counters above to estimate costs before triggering execution, especially when working with expensive models like GPT-4.1 or OpenAI o3.

---

In [None]:
# @title 📥 API Response
# @markdown This section displays the actual response returned by the selected OpenAI model when given the context and prompt. It helps you verify whether the model is producing the expected output, both in content and format. In real-world scenarios, this is the main data you'd capture and display to users or pipe into downstream logic (e.g., chat apps, data extractors, summarizers). Reviewing the output here is crucial for validating prompt quality, assessing model suitability, and tuning for better performance or cost.

from openai import OpenAI
import pandas as pd
from IPython.display import display, Code, Markdown

model = modelSelection.value
context = contextInput.value
prompt = promptInput.value

client = OpenAI()

completion = client.chat.completions.create(
    model=model,
    messages=[
        {
            "role": "developer",
            "content": context
        },
        {
            "role": "user",
            "content": prompt
        }

    ]
)

result_content = completion.choices[0].message.content
usage = completion.usage

display(Markdown(result_content))

In [None]:
# @title 📊 Used Tokens
# @markdown This section visualizes token usage, split by type: prompt, completion, cached, etc. It provides a real-time look into how many tokens each part of the interaction consumes, which is key to understanding model behavior and estimating costs.

import pandas as pd

prompt_tokens = usage.prompt_tokens
completion_tokens = usage.completion_tokens
total_tokens = usage.total_tokens

# Extract detailed token usage
completion_details = usage.completion_tokens_details
prompt_details = usage.prompt_tokens_details

accepted_prediction_tokens = completion_details.accepted_prediction_tokens
rejected_prediction_tokens = completion_details.rejected_prediction_tokens
reasoning_tokens = completion_details.reasoning_tokens
completion_audio_tokens = completion_details.audio_tokens

cached_prompt_tokens = prompt_details.cached_tokens
prompt_audio_tokens = prompt_details.audio_tokens

token_data = {
    "Prompt Tokens": prompt_tokens,
    "Completion Tokens": completion_tokens,
    "Total Tokens": total_tokens,
    "Accepted Prediction Tokens": accepted_prediction_tokens,
    "Rejected Prediction Tokens": rejected_prediction_tokens,
    "Reasoning Tokens": reasoning_tokens,
    "Completion Audio Tokens": completion_audio_tokens,
    "Prompt Audio Tokens": prompt_audio_tokens,
    "Cached Prompt Tokens": cached_prompt_tokens
}

df = pd.DataFrame(token_data.items(), columns=["Token Type", "Count"])
display(df)

In [None]:
# @title 💵 Costs
# @markdown Based on token usage and the selected model's pricing, this section shows the total cost of the interaction. It breaks down the cost of uncached input, cached input, and output tokens so you can pinpoint where costs are coming from.

def get_model_token_prices(model_name: str, models_list: dict):
    """
    Returns input, cached input, and output prices per token for a given model using models_list pricing.

    Usage:
        get_model_token_prices("o4-mini", models_list)

    :param model_name: Name of the model (case-insensitive, flexible format)
    :param models_list: Dictionary containing model pricing information
    :return: dict with per-token prices or None if not found
    """
    if model_name not in models_list:
        print(f"Model '{model_name}' not found.")
        return None

    million = 1_000_000
    model_prices = models_list[model_name]

    # Handle case where cached_input might be None
    cached_input_price = model_prices["cached_input"] if model_prices["cached_input"] is not None else 0.0

    per_token_prices = {
        "input_per_token": model_prices["input"] / million,
        "cached_input_per_token": cached_input_price / million,
        "output_per_token": model_prices["output"] / million
    }
    return per_token_prices

# Get prices for selected model
prices = get_model_token_prices(model, models_list)

if prices is None:
    print("Could not calculate costs - model pricing not found")
else:
    model_input_cost = prices['input_per_token']
    model_cached_input_cost = prices['cached_input_per_token']
    model_output_cost = prices['output_per_token']

    # Calculate costs
    # For uncached tokens
    uncached_tokens = prompt_tokens - cached_prompt_tokens
    uncached_input_cost = uncached_tokens * model_input_cost

    # For cached tokens
    cached_input_cost = cached_prompt_tokens * model_cached_input_cost

    # Output tokens cost
    output_cost = completion_tokens * model_output_cost

    # Total cost
    total_cost = uncached_input_cost + cached_input_cost + output_cost

    # Create a DataFrame with the detailed cost breakdown
    cost_df = pd.DataFrame({
        'Cost Type': [
            'Uncached Input Tokens Cost',
            'Cached Input Tokens Cost',
            'Output Tokens Cost',
            'Total Cost'
        ],
        'Tokens': [
            uncached_tokens,
            cached_prompt_tokens,
            completion_tokens,
            total_tokens
        ],
        'Cost (USD)': [
            uncached_input_cost,
            cached_input_cost,
            output_cost,
            total_cost
        ]
    })

    # Format the Cost column to show 4 decimal places
    cost_df['Cost (USD)'] = cost_df['Cost (USD)'].map('${:,.4f}'.format)

    display(cost_df)