In [2]:
# If you're in Colab, this will install tiktoken once.
!pip install -q tiktoken

import json
from textwrap import dedent
import tiktoken

# -------- Sample JSON (same as in the blog) --------
sample_json = dedent("""
{
  "products": [
    { "id": 1, "name": "Laptop",  "price": 3999.90 },
    { "id": 2, "name": "Mouse",   "price": 149.90  },
    { "id": 3, "name": "Headset", "price": 499.00  }
  ]
}
""").strip()

# -------- Format converters: JSON -> TOON / VSC --------
def json_to_toon(json_str):
    """
    Convert:
      {
        "products": [
          {"id": 1, "name": "Laptop", "price": 3999.90},
          ...
        ]
      }

    to:

      products[3]{id,name,price}:
      1,Laptop,3999.90
      2,Mouse,149.90
      3,Headset,499.00
    """
    data = json.loads(json_str)
    products = data["products"]

    fields = ["id", "name", "price"]
    header = f"products[{len(products)}]{{{','.join(fields)}}}:"

    lines = [header]
    for p in products:
        row = f"{p['id']},{p['name']},{p['price']}"
        lines.append(row)

    return "\n".join(lines)


def json_to_vsc(json_str):
    """
    Convert the same JSON to plain VSC:

      Laptop,3999.90
      Mouse,149.90
      Headset,499.00

    Here we intentionally drop `id` to keep it minimal.
    """
    data = json.loads(json_str)
    products = data["products"]

    lines = [f"{p['name']},{p['price']}" for p in products]
    return "\n".join(lines)


# -------- Build all three payloads --------
json_text = sample_json
toon_text = json_to_toon(sample_json)
vsc_text  = json_to_vsc(sample_json)

print("=== JSON ===")
print(json_text, "\n")
print("=== TOON ===")
print(toon_text, "\n")
print("=== VSC ===")
print(vsc_text, "\n")

# -------- Token counting with tiktoken --------
# Try to use a real model encoding; fall back to cl100k_base if needed.
try:
    encoding = tiktoken.encoding_for_model("gpt-4o")
except KeyError:
    encoding = tiktoken.get_encoding("cl100k_base")

def count_tokens(text: str) -> int:
    return len(encoding.encode(text))

json_tokens = count_tokens(json_text)
toon_tokens = count_tokens(toon_text)
vsc_tokens  = count_tokens(vsc_text)

# -------- Print comparison table --------
rows = [
    ("JSON", json_tokens),
    ("TOON", toon_tokens),
    ("VSC",  vsc_tokens),
]

max_name_len = max(len(name) for name, _ in rows)
print("\nToken usage comparison (using", encoding.name, "):\n")
print(f"{'Format'.ljust(max_name_len)} | Tokens | % vs JSON")
print("-" * (max_name_len + 22))

for name, tokens in rows:
    if name == "JSON":
        rel = "100%"
    else:
        rel = f"{tokens / json_tokens * 100:5.1f}%"
    print(f"{name.ljust(max_name_len)} | {str(tokens).rjust(6)} | {rel}")

# -------- Optional: rough cost estimate --------
price_per_million = 1.00  # $1 per 1M input tokens (example)
def cost_for(tokens: int, calls: int = 10_000) -> float:
    return (tokens * calls / 1_000_000) * price_per_million

print("\nApproximate cost for 10,000 calls at $1 / 1M input tokens:")
for name, tokens in rows:
    c = cost_for(tokens)
    print(f"- {name}: ${c:.4f}")


=== JSON ===
{
  "products": [
    { "id": 1, "name": "Laptop",  "price": 3999.90 },
    { "id": 2, "name": "Mouse",   "price": 149.90  },
    { "id": 3, "name": "Headset", "price": 499.00  }
  ]
} 

=== TOON ===
products[3]{id,name,price}:
1,Laptop,3999.9
2,Mouse,149.9
3,Headset,499.0 

=== VSC ===
Laptop,3999.9
Mouse,149.9
Headset,499.0 


Token usage comparison (using o200k_base ):

Format | Tokens | % vs JSON
--------------------------
JSON |     81 | 100%
TOON |     35 |  43.2%
VSC  |     19 |  23.5%

Approximate cost for 10,000 calls at $1 / 1M input tokens:
- JSON: $0.8100
- TOON: $0.3500
- VSC: $0.1900
