In [7]:
from utils.token_count import num_tokens_from_messages
import re
import os

REGEX_PATTERN = (r"(\*\*\S+\*\*)\n\n\n([\s\S]+?)\n------------------\n")
SYSTEM_PROMPT = {"role": "system", "content": "You are ChatGPT, a large language model trained by OpenAI, based on the GPT-3.5 architecture. Knowledge cutoff: 2021-09 Current date: 2023-04-27"}
COST_DICT = {
    "gpt-3.5-turbo": {
        "prompting": 0.002,
        "completion": 0.002,
    },
    "gpt-4": {
        "prompting": 0.03,
        "completion": 0.06,
    },
}

In [8]:
def get_messages_json(messages):
    messages = re.findall(REGEX_PATTERN, messages)
    message_json = []
    for role, message in messages:
        if "you" in role.lower():
            role = "user"
        else:
            role = "assistant"
        message_json.append({"role": role, "content": message})
    return message_json

def get_prompting_session(message_json):
    prompting_session = []
    for i in range(0,len(message_json),2):
        prompt_dict = {
            "prompt": [SYSTEM_PROMPT] + message_json[:i+1],
            "completion": message_json[i+1]
        }
        prompting_session.append(prompt_dict)
    return prompting_session

def evaluate_cost(prompting_session, model="gpt-4"):
    prompt_cost = COST_DICT[model]["prompting"]
    completion_cost = COST_DICT[model]["completion"]
    total_cost = 0
    for calls in prompting_session:
        prompt_tokens = num_tokens_from_messages(calls["prompt"], model=model)
        completion_tokens = num_tokens_from_messages([calls["completion"]], model=model)
        total_cost += prompt_cost * prompt_tokens / 1000 + completion_cost * completion_tokens / 1000
    return total_cost

In [9]:
total_cost = 0

for filename in os.listdir("data"):
    if "gpt3" in filename:
        model = "gpt-3.5-turbo"
    else:
        model = "gpt-4"
    with open(f"data/{filename}", "r") as f:
        messages = f.read()
    messages = get_messages_json(messages)
    try:
        prompting_session = get_prompting_session(messages)
    except IndexError:
        print(filename)
    cost = evaluate_cost(prompting_session, model=model)
    total_cost += cost

print(f"Total cost of all sessions using API: ${total_cost:.2f}")
if total_cost > 20:
    print("Since the cost is greater than $20, you should use ChatGPT instead of the API")
else:
    print("Since the cost is less than $20, you should use the API instead of ChatGPT")

5434ecd6-b0ff-4c3d-b993-10042d65a368.md 0.38031
03534e08-2dc9-48fc-9607-6a2dbb4258fd.md 0.08019
3ca942b1-a32f-407a-8794-f5e228435590.md 2.8079400000000003
12593e1e-714f-47da-b372-76f9f57b6498.md 2.761199999999999
6ece9ae0-2d8f-4779-a7d5-9f6d0097e65b.md 0.19377
2b1ace56-1b32-4709-84eb-7dcf8883a502.md 3.2576999999999994
c78f5a5b-6acd-4bfe-a4cf-1e5952f502f1.md 1.4603699999999997
d03766d1-eeee-4d54-8cd5-8d61b8ffbd88.md 0.24348
075d6e24-816e-4e12-9efb-f824ba381b4d.md 7.292130000000001
8c38a2fe-d4ad-4c01-9355-d0727441ac91.md 0.03885
d09b67b8-7d80-4dfd-8830-12c51c01f6c9.md 0.28343999999999997
gpt3-74569f78-8bd4-4f64-90a6-2b85063b72be.md 0.44201599999999996
fa34130c-f775-43fb-b927-91209b5cb314.md 1.82484
608a429e-b503-484e-8fbd-fdc2bab9ecaf.md 1.2572699999999999
22.32351 USD
