# Tests for Cost Estimation Utilities

_Last Update:_ 2025.11.08


## Environmental Settings


In [36]:
import os
import sys
from pathlib import Path
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# PROJECT ROOT to Python paths
ROOT_DIR = Path().resolve().parents[0]
if str(ROOT_DIR) not in sys.path:
    sys.path.insert(0, str(ROOT_DIR))
# RELOAD the custom script
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
from dotenv import load_dotenv
# Load environment variables from .env file
SECRETS_FILE = Path(r"C:\LocalSecrets\master.env")
load_dotenv(str(SECRETS_FILE))
# ASSIGN the values of the environment variables
openai_key = str(os.getenv("OPENAI_UIO24EMC_KEY"))
pplx_key = str(os.getenv("PERPLEXITY_UIO24EMC_KEY"))
claude_key = str(os.getenv("ANTHROPIC_API_KEY"))

## Estimation Utilities
- `estimate_costs_for_models()`: Calculate token usage and costs for DataFrame inputs
- `create_cost_matrix()`: Format cost estimates into styled comparison tables
- `display_gpt_models_df()`: Display OpenAI model pricing and token limits

In [35]:
# 01 Basic View of Cost per 1$ per 1 Million Tokens input and Output
from gptquery.estimation.cost_estimator import display_gpt_models_df
display_gpt_models_df()

Unnamed: 0,Model,Max Input Tokens,Max Output Tokens,Input Cost ($/1M),Output Cost ($/1M)
20,gpt-3.5-turbo,16385.0,4096.0,$1.500,$2.000
24,gpt-3.5-turbo-0125,16385.0,4096.0,$0.500,$1.500
21,gpt-3.5-turbo-0301,4097.0,4096.0,$1.500,$2.000
22,gpt-3.5-turbo-0613,4097.0,4096.0,$1.500,$2.000
23,gpt-3.5-turbo-1106,16385.0,4096.0,$1.000,$2.000
...,...,...,...,...,...
57,gpt-5-mini,400000.0,128000.0,$0.250,$2.000
62,gpt-5-mini-2025-08-07,400000.0,128000.0,$0.250,$2.000
58,gpt-5-nano,400000.0,128000.0,$0.050,$0.400
63,gpt-5-nano-2025-08-07,400000.0,128000.0,$0.050,$0.400


In [None]:
# IMPROT Functions
from gptquery.estimation.cost_estimator import (estimate_costs_for_models)
from gptquery.tools.tool_eulaw_citations.validate_citations.prompts.default import (prompt_validate_completeness,
                                                                                    VALIDATION_SYSTEM_MESSAGE)
# LOAD reliability CSV
df_reliab = pd.read_csv(ROOT_DIR / "data" / "uio24hlp" / "realiability_data_refs.csv")
df_ref_lvl = df_reliab[["uoa_referral_id",
                        "questions_text", 
                        "std_potential_citations",
                        "nlang_potential_citations"]].drop_duplicates().reset_index(drop=True)

# RENAME columns so thet match prompt fucntion @required_columns() decorator.
df_ref_lvl = df_ref_lvl.rename(columns={"questions_text": "question_text"})                   # type: ignore
df_ref_lvl = df_ref_lvl.rename(columns={"nlang_potential_citations": "potential_citations"})  # type: ignore

# MODELS to tests
MODELS = ["gpt-4.1-mini", "gpt-5", "gpt-5-mini","gpt-5-nano" ]

# COST estimation
costs = estimate_costs_for_models(df_ref_lvl,
                                  prompt_validate_completeness,
                                  models=MODELS,
                                  system_msg=VALIDATION_SYSTEM_MESSAGE,
                                  expected_response_length= "completed")


In [34]:
## 03 TEST COST MATRIX
from gptquery.estimation.cost_estimator import (create_cost_matrix)
create_cost_matrix(costs)

Unnamed: 0,Model,Total Cost ($),Cost Per Row ($),Total Tokens,Input Cost ($),Output Cost ($)
0,gpt-5-nano,$0.3699,$0.0001,7358179,$0.3679,$0.0020
1,gpt-5-mini,$1.8496,$0.0004,7358179,$1.8395,$0.0100
2,gpt-4.1-mini,$2.9513,$0.0006,7358179,$2.9433,$0.0080
3,gpt-5,$9.2480,$0.0018,7358179,$9.1977,$0.0502
