# OpenAI API Call Estimation

Import packages.

In [10]:
import json
import pandas as pd
import tiktoken

from pathlib import Path
from tqdm.auto import tqdm

Read GPT-4's log file to count the number of successful API calls.

In [11]:
data_dir = "data_multimedia"
model = "gpt-4"
log_path = Path(f"../{data_dir}/predictions/{model}.log")

kwargs = {
    "desc": "Counting API calls",
    "unit": "line",
}

num_api_calls = 0
with open(log_path, "r") as f:
    for line in tqdm(f, **kwargs):
        if "Completed:" in line:
            num_api_calls = int(line.strip().split("Completed:")[1].split()[0])

print(f"Number of successfull API calls: {num_api_calls}")

Counting API calls: 0line [00:00, ?line/s]

Number of successfull API calls: 55


Load all of GPT-4's predictions.

In [12]:
predictions_path = Path(f"../{data_dir}/predictions/{model}.json")

kwargs = {
    "desc": "Loading predictions",
    "unit": "line",
    "total": num_api_calls,
}

predictions = []
with open(predictions_path, "r") as f:
    for line in f:
        predictions.append(json.loads(line.strip()))

df = pd.DataFrame(predictions)
print(f"Loaded {len(df)} predictions")
df.head()

Loaded 55 predictions


Unnamed: 0,id,user_request,result
0,25850214,"I have some text containing multiple URLs, and...",{'task_steps': ['Step 1: Extract all URLs from...
1,58722823,I need to speed up the video 'example.mp4' to ...,{'task_steps': ['Step 1: Adjust the playback s...
2,60128636,I have a video 'example.mp4' and a Spanish scr...,{'task_steps': ['Step 1: Translate the Spanish...
3,70835252,Please download the audio file from the follow...,{'task_steps': ['Step 1: Download the audio fi...
4,32414505,I have a Spanish text 'Hola mundo' and I want ...,{'task_steps': ['Step 1: Translate the Spanish...


View a single prediction from the predictions file.

In [13]:
predictions[0]

{'id': '25850214',
 'user_request': "I have some text containing multiple URLs, and I want to extract all of them in a list. Here is the text: 'Check out these cool websites: https://www.example1.com, http://www.example2.net and https://www.example3.org.'",
 'result': {'task_steps': ['Step 1: Extract all URLs from the provided text.'],
  'task_nodes': [{'task': 'URL Extractor',
    'arguments': ['Check out these cool websites: https://www.example1.com, http://www.example2.net and https://www.example3.org.']}]}}

Count the number of input tokens (`user_request`) and output tokens (`result`) across all predictions.

In [14]:
enc = tiktoken.encoding_for_model(model)

kwargs = {
    "desc": "Counting input and output tokens",
    "unit": "prediction",
    "total": len(df),
}

num_input_tokens, num_output_tokens = 0, 0
for _, row in tqdm(df.iterrows(), **kwargs):
    num_input_tokens += len(enc.encode(row["user_request"]))

    # Serialize the dict into a string for counting
    output_string = json.dumps(row["result"], ensure_ascii=False)
    num_output_tokens += len(enc.encode(output_string))

print(f"Number of input tokens across {len(df)} predictions: {num_input_tokens}")
print(f"Number of output tokens across {len(df)} predictions: {num_output_tokens}")
print(
    f"Total number of tokens across {len(df)} predictions: {num_input_tokens + num_output_tokens}"
)

Counting input and output tokens:   0%|          | 0/55 [00:00<?, ?prediction/s]

Number of input tokens across 55 predictions: 2269
Number of output tokens across 55 predictions: 6186
Total number of tokens across 55 predictions: 8455


Calculate the average number of input and output tokens per prediction.

In [15]:
# Average number of input tokens per prediction
average_input_tokens = num_input_tokens / len(df)

# Average number of output tokens per prediction
average_output_tokens = num_output_tokens / len(df)

print(f"Average input tokens per prediction: {int(average_input_tokens)}")
print(f"Average output tokens per prediction: {int(average_output_tokens)}")

Average input tokens per prediction: 41
Average output tokens per prediction: 112


Estimate the cost for using GPT-4 on all of the generated predictions. 
- [GPT-4](https://platform.openai.com/docs/models/gpt-4)'s price per 1M tokens
- [GPT-OSS-120B](https://www.together.ai/pricing)'s price per 1M tokens

In [None]:
# per_input_token_cost = 30 / 1_000_000  # $30 per 1M input tokens
# per_output_token_cost = 60 / 1_000_000  # $60 per 1M output tokens

per_input_token_cost = 0.15 / 1e7  # $0.15 per 1M input tokens
per_output_token_cost = 0.60 / 1e7  # $0.60 per 1M output tokens

input_token_cost = num_input_tokens * per_input_token_cost
output_token_cost = num_output_tokens * per_output_token_cost
total_cost = input_token_cost + output_token_cost

print(
    f"Estimated input token cost for {num_api_calls} predictions: "
    f"${input_token_cost:.4f}"
)
print(
    f"Estimated output token cost for {num_api_calls} predictions: "
    f"${output_token_cost:.4f}"
)
print(f"Estimated total cost for {num_api_calls} predictions: ${total_cost:.4f}")

Estimated input token cost for 55 predictions: $0.0000
Estimated output token cost for 55 predictions: $0.0004
Estimated total cost for 55 predictions: $0.0004


Estimate the average cost of using GPT-4 on a single prediction.

In [21]:
# Estimated input token cost for one prediction
average_input_token_cost = average_input_tokens * per_input_token_cost

# Estimated output token cost for one prediction
average_output_token_cost = average_output_tokens * per_output_token_cost

# Estimated total cost for one prediction
average_total_cost = average_input_token_cost + average_output_token_cost

print(f"Estimated input token cost for one prediction: ${average_input_token_cost:3e}")
print(
    f"Estimated output token cost for one prediction: ${average_output_token_cost:3e}"
)
print(f"Estimated total cost for one prediction: ${average_total_cost:3e}")

Estimated input token cost for one prediction: $6.188182e-07
Estimated output token cost for one prediction: $6.748364e-06
Estimated total cost for one prediction: $7.367182e-06


Estimate the total cost of using GPT-4 on all samples across all three TaskBench datasets.

In [18]:
num_samples_map = {
    "dailylifeapis": 4318,
    "huggingface": 7458,
    "multimedia": 5555,
}

# Number of samples is the same as number of predictions
total_samples = sum(num_samples_map.values())
print(f"Total number of samples across all TaskBench datasets: {total_samples:,}")

total_input_token_cost = total_samples * average_input_token_cost
total_output_token_cost = total_samples * average_output_token_cost
total_cost = total_input_token_cost + total_output_token_cost

print(
    f"Estimated input token cost for {total_samples:,} predictions: "
    f"${total_input_token_cost:.2f}"
)
print(
    f"Estimated output token cost for {total_samples:,} predictions: "
    f"${total_output_token_cost:.2f}"
)
print(f"Estimated total cost for {total_samples:,} predictions: ${total_cost:.2f}")

Total number of samples across all TaskBench datasets: 17,331
Estimated input token cost for 17,331 predictions: $0.01
Estimated output token cost for 17,331 predictions: $0.12
Estimated total cost for 17,331 predictions: $0.13
