In [1]:
import tiktoken

def calculate_openai_cost(
    prompt_tokens=0,
    completion_tokens=0,
    image_count=0,
    resolution="low",  # "low", "high", "auto"
    model="gpt-4-vision-preview",
):
    """Calculates the cost of using OpenAI's GPT-4 with vision capabilities.

    Args:
        prompt_tokens: Number of prompt tokens.
        completion_tokens: Number of completion tokens.
        image_count: Number of images processed.
        resolution: Resolution of the images ("low", "high", "auto").
        model: The model being used (defaults to gpt-4-vision-preview).

    Returns:
        The estimated cost in USD.
        Returns None if the model is not supported.
    """

    if model != "gpt-4-vision-preview":
        print(f"Model {model} is not currently supported for cost calculation.")
        return None

    prompt_price_per_1k = 0.03
    completion_price_per_1k = 0.06

    image_prices = {
        "low": 0.00085,  # Per image
        "high": 0.0085, # Per image
        "auto": 0.00255, # Per image
    }

    if resolution not in image_prices:
        raise ValueError("Invalid resolution. Choose 'low', 'high', or 'auto'.")

    prompt_cost = (prompt_tokens / 1000) * prompt_price_per_1k
    completion_cost = (completion_tokens / 1000) * completion_price_per_1k
    image_cost = image_count * image_prices[resolution]

    total_cost = prompt_cost + completion_cost + image_cost
    return total_cost


def count_tokens(text, model="gpt-4"):
    """Counts tokens in a text string using tiktoken."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    num_tokens = len(encoding.encode(text))
    return num_tokens


In [2]:

# Example usage:
prompt_text = "Describe this image in detail: "
image_url = "https://example.com/image.jpg" # Example URL
instruction_tokens = count_tokens(prompt_text)
print(f"Instruction Tokens: {instruction_tokens}")
completion_text = "The image shows a cat sitting on a mat."
completion_tokens = count_tokens(completion_text)
print(f"Completion Tokens: {completion_tokens}")

cost = calculate_openai_cost(
    prompt_tokens=instruction_tokens,
    completion_tokens=completion_tokens,
    image_count=1,
    resolution="high",
)

if cost is not None:
    print(f"Estimated cost: ${cost:.6f}")

Instruction Tokens: 7
Completion Tokens: 10


In [3]:


cost_auto = calculate_openai_cost(
    prompt_tokens=instruction_tokens,
    completion_tokens=completion_tokens,
    image_count=1,
    resolution="auto",
)

if cost_auto is not None:
    print(f"Estimated cost (auto resolution): ${cost_auto:.6f}")

cost_low = calculate_openai_cost(
    prompt_tokens=instruction_tokens,
    completion_tokens=completion_tokens,
    image_count=1,
    resolution="low",
)

if cost_low is not None:
    print(f"Estimated cost (low resolution): ${cost_low:.6f}")

# Example with multiple images and longer text
long_prompt = "Describe these 3 images in detail, focusing on the differences between them:\n"
long_prompt_tokens = count_tokens(long_prompt)

cost_multiple = calculate_openai_cost(
    prompt_tokens=long_prompt_tokens,
    completion_tokens=500, # Example
    image_count=3,
    resolution="auto",
)

if cost_multiple is not None:
    print(f"Estimated cost (multiple images): ${cost_multiple:.6f}")

Estimated cost: $0.009310
Estimated cost (auto resolution): $0.003360
Estimated cost (low resolution): $0.001660
Estimated cost (multiple images): $0.038100
