In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from econ_evals.utils.helper_functions import get_base_dir_path
from ast import literal_eval
from econ_evals.experiments.pricing.pricing_market_logic_multiproduct import (
    get_profits,
)

import os
import pickle

import numpy as np

# Load data

In [None]:
big_dirnames = [
    dirname
    for dirname in os.listdir(get_base_dir_path() / "experiments/pricing/logs/")
    if dirname.startswith("2025-")
]

base_log_dir = get_base_dir_path() / "experiments" / "pricing" / "logs"

In [None]:
if not os.path.exists(
    get_base_dir_path() / "experiments/pricing" / "dirname_to_monopoly_prices.pkl"
):
    print(
        "No monopoly prices pre-computed. Please run the script precompute_pricing_opt.py to update the file dirname_to_monopoly_prices.pkl."
    )

else:
    with open(
        get_base_dir_path() / "experiments/pricing" / "dirname_to_monopoly_prices.pkl",
        "rb",
    ) as f:
        dirname_to_monopoly_prices = pickle.load(f)

    if len(dirname_to_monopoly_prices) != len(
        [
            dirname
            for big_dirname in big_dirnames
            for dirname in os.listdir(base_log_dir / big_dirname)
        ]
    ):
        print(
            "Not all monopoly prices pre-computed. Please run the script precompute_pricing_opt.py to update the file dirname_to_monopoly_prices.pkl."
        )
    else:
        print("Good to go -- all monopoly prices pre-computed.")

In [None]:
START_ROUND_IDX = 50
END_ROUND_IDX = 100

table = []


for big_dirname in big_dirnames:
    for dirname in os.listdir(base_log_dir / big_dirname):
        log_path = base_log_dir / big_dirname / dirname / "logs.csv"
        global_params = pd.read_csv(
            base_log_dir / big_dirname / dirname / "global_params.csv"
        ).to_dict(orient="records")[0]

        seed = global_params["seed"]

        log_df = pd.read_csv(log_path)

        if len(log_df) == 0:
            print(f"Skipping {log_path}")
            continue

        market_df = log_df.dropna(subset=["prices"])[
            ["prices", "quantities", "profits", "attempt_num"]
        ].reset_index(drop=True)

        market_df["prices"] = market_df["prices"].apply(literal_eval)
        market_df["quantities"] = market_df["quantities"].apply(literal_eval)
        market_df["profits"] = market_df["profits"].apply(literal_eval)

        model = log_df["model"].dropna().values[0]

        num_attempts = global_params["num_attempts"]

        a0 = global_params["a0"]
        a_tuple = literal_eval(global_params["a_tuple"])
        alpha_list = literal_eval(global_params["alpha_list"])
        multiplier_list = literal_eval(global_params["multiplier_list"])
        costs = literal_eval(global_params["costs"])
        c = list(costs.values())
        mu = global_params["mu"]
        sigma = global_params["sigma"]
        group_idxs = literal_eval(global_params["group_idxs"])

        monopoly_prices = []
        monopoly_profits = []

        monopoly_prices = dirname_to_monopoly_prices[dirname]

        for alpha, multiplier, monopoly_prices_i in zip(
            alpha_list, multiplier_list, monopoly_prices
        ):
            monopoly_profits_i = get_profits(
                p=monopoly_prices_i,
                a0=a0,
                a=a_tuple,
                c=c,
                mu=mu,
                alpha=alpha,
                sigma=sigma,
                group_idxs=group_idxs,
                multiplier=multiplier,
            )
            monopoly_profits.append(monopoly_profits_i)

        df_long = (
            pd.concat(
                [
                    market_df.set_index("attempt_num")["profits"]
                    .apply(pd.Series)
                    .stack()
                    .rename("profit")
                ],
                axis=1,
            )
            .reset_index()
            .rename(columns={"level_1": "product_id"})
        )

        df_profit = df_long.pivot(
            index="attempt_num", columns="product_id", values="profit"
        )

        ## BENCHMARK QUANTITIES
        total_profits = df_profit.iloc[START_ROUND_IDX:END_ROUND_IDX].sum(axis=1).sum()

        opt_profits = (
            np.array(monopoly_profits[START_ROUND_IDX:END_ROUND_IDX]).sum().sum()
        )

        short_log_subdirname = "__".join(dirname.split("__")[1:])

        table.append(
            {
                "dirname": dirname,
                "log_subdirname": big_dirname,
                "ratio": total_profits / opt_profits,
                "seed": seed,
                "model": model,
                "difficulty": short_log_subdirname.split("__")[1],
            }
        )

df_table = pd.DataFrame(table)

In [None]:
df_table

# Calculate pricing benchmark scores for each LLM and difficulty level

In [None]:
(
    df_table[["model", "difficulty", "ratio"]].groupby(["model", "difficulty"]).mean()
    * 100
).reset_index()