# Asahi - Cost/Latency/Quality Analysis

This notebook analyzes the results of the Asahi inference optimizer benchmark.

In [None]:
import json
import os
import sys
sys.path.insert(0, os.path.abspath('..'))

# Load results
data_dir = os.path.join('..', 'data')

def load_json(filename):
    path = os.path.join(data_dir, filename)
    if os.path.exists(path):
        with open(path) as f:
            return json.load(f)
    return None

baseline = load_json('baseline_results.json')
optimized = load_json('optimized_results.json')

if baseline and optimized:
    print('Results loaded successfully')
else:
    print('Run: python main.py benchmark --mock  (from project root)')

: 

In [None]:
# Cost Comparison
if baseline and optimized:
    b_cost = baseline['total_cost']
    o_cost = optimized['total_cost']
    savings_pct = (b_cost - o_cost) / b_cost * 100 if b_cost > 0 else 0

    print(f"Baseline (All GPT-4): ${b_cost:.4f}")
    print(f"Optimized (Smart):    ${o_cost:.4f}")
    print(f"Savings:              {savings_pct:.1f}%")
    print(f"Absolute Savings:     ${b_cost - o_cost:.4f}")

In [None]:
# Model Usage Distribution
if optimized:
    print('\nModel Usage (Optimized):')
    for model, count in optimized.get('requests_by_model', {}).items():
        cost = optimized.get('cost_by_model', {}).get(model, 0)
        print(f'  {model}: {count} requests, ${cost:.4f} total cost')

    print(f'\nCache Hit Rate: {optimized.get("cache_hit_rate", 0):.1%}')
    print(f'Avg Latency:    {optimized.get("avg_latency_ms", 0):.0f}ms')

In [None]:
# Cost Per Request Comparison
if baseline and optimized:
    b_per = b_cost / max(1, baseline['requests'])
    o_per = o_cost / max(1, optimized['requests'])
    print(f'Cost per request (Baseline): ${b_per:.6f}')
    print(f'Cost per request (Optimized): ${o_per:.6f}')
    print(f'Reduction: {((b_per - o_per) / b_per * 100):.1f}%')

In [None]:
# Latency Comparison
if baseline and optimized:
    print(f'Avg Latency (Baseline):  {baseline.get("avg_latency_ms", 0):.0f}ms')
    print(f'Avg Latency (Optimized): {optimized.get("avg_latency_ms", 0):.0f}ms')

In [None]:
# Summary Table
if baseline and optimized:
    print(f"{'Metric':<30} {'Baseline':>12} {'Optimized':>12} {'Improvement':>12}")
    print('-' * 66)
    print(f"{'Total Cost':<30} ${b_cost:>11.4f} ${o_cost:>11.4f} {savings_pct:>11.1f}%")
    print(f"{'Requests':<30} {baseline['requests']:>12} {optimized['requests']:>12} {'':>12}")
    print(f"{'Avg Latency (ms)':<30} {baseline.get('avg_latency_ms',0):>12.0f} {optimized.get('avg_latency_ms',0):>12.0f} {'':>12}")
    print(f"{'Cache Hit Rate':<30} {'N/A':>12} {optimized.get('cache_hit_rate',0):>11.1%} {'':>12}")