In [1]:
import numpy as np
import os
import json
import tqdm
import re

In [2]:
LOGS_PATH = "/lfs/1/keshav2/helm-efficiency/logs"

In [3]:
data = []
datadir = os.path.join(LOGS_PATH, "v7_subset")
for subdir in tqdm.tqdm(os.listdir(datadir)):
    with open(os.path.join(datadir, subdir, "per_instance_stats.json"), "r") as f:
        stats = json.load(f)
        for stat in stats:
            num_prompt_tokens = None
            num_output_tokens = None
            idealized_runtime = None
            counter = 0
            for metric in stat["stats"]:
                if metric["name"]["name"] == "num_prompt_tokens":
                    num_prompt_tokens = metric["sum"]
                    counter += 1
                elif metric["name"]["name"] == "num_output_tokens":
                    num_output_tokens = metric["sum"]
                    counter += 1
                elif metric["name"]["name"] == "inference_idealized_runtime":
                    counter += 1
                    idealized_runtime = metric["sum"]
                if counter == 3:
                    break
            if num_prompt_tokens is None or num_output_tokens is None:
                continue
            data.append((num_prompt_tokens, num_output_tokens, idealized_runtime))

100%|██████████| 112/112 [00:36<00:00,  3.03it/s]


In [4]:
def api_cost(num_prompt_tokens, num_output_tokens, num_trials, per_token_costs):
    input_cost = per_token_costs["input"] * num_prompt_tokens / 1000
    output_cost = per_token_costs["output"] * num_output_tokens / 1000
    return num_trials * (input_cost + output_cost)

In [5]:
def local_cost(runtime, num_trials, hardware_cost):
    return runtime * num_trials * hardware_cost / 60 / 60

In [6]:
def parse_calibration_log(path):
    runtime = 0.0
    with open(path, "r") as f:
        for line in f:
            match = re.search("([-+]?[0-9]*\.?[0-9]+) seconds", line)
            if match is not None:
                runtime += float(match.group(1))
    return runtime

In [7]:
def denoised_speedup(data):
    num_api_trials = 10
    num_local_trials = 50
    calibration_time = parse_calibration_log(
        os.path.join(LOGS_PATH, "a100", "openai_davinci.log")
    )
    davinci_costs = {"input": 0.0015, "output": 0.002}
    hardware_cost = 40.96
    baseline = sum([api_cost(x, y, num_api_trials, davinci_costs) for (x, y, _) in data])
    denoised = local_cost(calibration_time, num_local_trials, hardware_cost) + sum(
        [api_cost(x, y, 1, davinci_costs) for (x, y, _) in data]
    )
    print(f"Baseline cost: ${baseline:.2f}")
    print(f"Denoised cost: ${denoised:.2f}")
    print(f"Savings: {baseline / denoised:.2f}x")

In [8]:
denoised_speedup(data)

Baseline cost: $4176.34
Denoised cost: $711.97
Savings: 5.87x


In [9]:
def idealized_speedup(data):
    data = [(x, y, z) for (x, y, z) in data if z is not None]
    num_api_trials = 10
    num_local_trials = 50
    calibration_time = parse_calibration_log(
        os.path.join(LOGS_PATH, "a100", "openai_davinci.log")
    )
    davinci_costs = {"input": 0.0015, "output": 0.002}
    hardware_cost = 40.96
    baseline = sum(
        [local_cost(z, num_local_trials, hardware_cost) for (_, _, z) in data]
    ) + sum([api_cost(x, y, 1, davinci_costs) for (x, y, z) in data])
    idealized = local_cost(calibration_time, num_local_trials, hardware_cost) + sum(
        [api_cost(x, y, 1, davinci_costs) for (x, y, z) in data]
    )
    print(f"Baseline cost: ${baseline:.2f}")
    print(f"Idealized cost: ${idealized:.2f}")
    print(f"Savings: {baseline / idealized:.2f}x")

In [10]:
idealized_speedup(data)

Baseline cost: $31128.26
Idealized cost: $545.24
Savings: 57.09x
