In [None]:
import os
from inspect_ai.log import read_eval_log
import pandas as pd

In [None]:
pricing_df = pd.read_csv("../data/models_pricing.csv")

In [None]:
pricing_df

In [None]:
import os
from termcolor import colored, cprint

root_dir = "../outputs"

for eval_dir in os.listdir(root_dir):
    if eval_dir.startswith("eval_"):
        for model_dir in os.listdir(os.path.join(root_dir, eval_dir)):
            # HEADER: Bold and Blue to distinguish sections
            print("-" * 60)
            cprint(f"Evaluating {model_dir}", "cyan", attrs=["bold"])

            log_file = os.path.join(
                root_dir, eval_dir, model_dir,
                [f for f in os.listdir(os.path.join(root_dir, eval_dir, model_dir)) if f.endswith(".eval")][0]
            )

            # Read the inspect.ai evaluation log
            log = read_eval_log(log_file)

            total_cost = 0.0
            
            for model_name, usage in log.stats.model_usage.items():
                # remove everything before the first slash
                cleaned_model_name = model_name.replace(model_name.split("/")[0] + "/", "")
                	
                try:
                    input_cost_per_1m = pricing_df.loc[pricing_df["model_name"] == cleaned_model_name, "input_price"].values[0]
                    output_cost_per_1m = pricing_df.loc[pricing_df["model_name"] == cleaned_model_name, "output_price"].values[0]
                except IndexError:
                    cprint(f"  Warning: No pricing found for {cleaned_model_name}", "red")
                    continue
                
                in_tks = usage.input_tokens
                out_tks = usage.output_tokens
                reas_tks = usage.reasoning_tokens	

                if reas_tks is None:
                    reas_tks = 0

                if "gpt-5" in cleaned_model_name:
                    out_tks = out_tks - reas_tks
                	
                # Calculate cost
                cost = (in_tks / 1_000_000 * input_cost_per_1m) + \
                       ((out_tks + reas_tks) / 1_000_000 * output_cost_per_1m)
                	
                total_cost += cost

                # ROW ITEM: Model in Yellow, Cost in Green
                print(f"  - {colored(model_name, 'yellow')} -> Cost: {colored(f'${cost:.4f}', 'green', attrs=['bold'])}")
                	
                # DETAILS: Dimmed/Grey for high-volume data
                details = (f"    (In: {in_tks:,}, Out: {out_tks:,}, "
                           f"Reasoning: {reas_tks:,})")
                cprint(details, "light_grey", attrs=["italic"])

            # SUMMARY: Bold Green for final tally
            print("")
            print(f"  Total: " + colored(f"${total_cost:.4f}", "green", attrs=["bold"]))
            print("")