In [19]:
import os, json, argparse
from collections import defaultdict


In [None]:
def iter_jsonl_files(folder):
    print("Looking for .jsonl files in folder:", folder)
    for root, _, files in os.walk(folder):
        for fn in files:
            print(fn)
            if fn.endswith(".jsonl"):
                yield os.path.join(root, fn)

def sum_costs(log_folder="logs", task=None, model=None, conv_type=None, dataset_fn=None):
    total = 0.0
    breakdown = defaultdict(float)
    per_file = {}
    print(list(iter_jsonl_files(log_folder)))
    for fn in iter_jsonl_files(log_folder):
        file_sum = 0.0
        with open(fn, "r", encoding="utf-8") as f:
            for line in f:
                try:
                    conv = json.loads(line)
                except:
                    continue
                if task and conv.get("task") != task:
                    continue
                if model and conv.get("assistant_model") != model:
                    continue
                if conv_type and conv.get("conv_type") != conv_type:
                    continue
                if dataset_fn and conv.get("dataset_fn") != dataset_fn:
                    continue

                # Sum cost_usd at trace-entry level
                for entry in conv.get("trace", []):
                    # trace entries usually have 'cost_usd' at top-level of entry
                    if "cost_usd" in entry and isinstance(entry["cost_usd"], (int, float)):
                        total += entry["cost_usd"]
                        file_sum += entry["cost_usd"]
                        # breakdown keys: assistant_model, task, conv_type
                        key = (conv.get("assistant_model"), conv.get("task"), conv.get("conv_type"))
                        breakdown[key] += entry["cost_usd"]

                # also consider top-level cost fields (rare)
                if "cost_usd" in conv and isinstance(conv["cost_usd"], (int, float)):
                    total += conv["cost_usd"]
                    file_sum += conv["cost_usd"]
        if file_sum > 0:
            per_file[fn] = file_sum

    return total, breakdown, per_file


In [21]:
log_folder = "logs/data2text"
task = "data2text"
model = "gpt-4o-mini"
conv_type = "sharded"
dataset_fn = None


In [22]:
total, breakdown, per_file = sum_costs(log_folder, task, model, conv_type, dataset_fn)
print(f"Total USD across logs: ${total:.6f}")
print("\nBreakdown (assistant_model, task, conv_type) -> USD")
for k, v in sorted(breakdown.items(), key=lambda x: -x[1]):
    print(f"{k} -> ${v:.6f}")
print("\nPer-file totals (non-zero):")
for fn, v in sorted(per_file.items(), key=lambda x: -x[1])[:50]:
    print(f"{fn}: ${v:.6f}")

[]
Total USD across logs: $0.000000

Breakdown (assistant_model, task, conv_type) -> USD

Per-file totals (non-zero):
