In [None]:
%load_ext autoreload 
%autoreload 2

In [None]:
import pandas as pd
from econ_evals.utils.helper_functions import get_base_dir_path
import os

In [None]:
data_dir = get_base_dir_path() / "experiments/efficiency_vs_equality/logs/"
dirnames = [dirname for dirname in os.listdir(data_dir)]

In [None]:
table = []

for dirname in dirnames:
    global_params = pd.read_csv(data_dir / f"{dirname}/global_params.csv").to_dict(
        orient="records"
    )[0]
    data = pd.read_csv(data_dir / f"{dirname}/data.csv")
    table.append(
        {
            "dirname": dirname,
            "prompt_type": global_params["prompt_type"],
            "seed": global_params["seed"],
            "model": global_params["model"],
            "wage": global_params["worker_wage"],
            "worker_productivity_gap": global_params["worker_productivity_gap"],
            "company_revenue_in_max_efficiency": global_params[
                "company_revenue_in_max_efficiency"
            ],
            "company_revenue_in_max_equality": global_params[
                "company_revenue_in_max_equality"
            ],
            "worker_inequality_in_max_efficiency": global_params[
                "worker_inequality_in_max_efficiency"
            ],
            "worker_inequality_in_max_equality": global_params[
                "worker_inequality_in_max_equality"
            ],
            "actual_company_revenue": data["company_revenue"].sum(),
            "actual_worker_inequality": data["worker_inequality"].iloc[-1],
        }
    )

df_table = pd.DataFrame(table)

# Compute reliability scores for efficiency vs. equality litmus test

In [None]:
df_table["equality_competency"] = (
    1
    - df_table["actual_worker_inequality"]
    / df_table["worker_inequality_in_max_efficiency"]
)
df_table["efficiency_competency"] = (
    df_table["actual_company_revenue"] / df_table["company_revenue_in_max_efficiency"]
)

efficiency_competency = (
    df_table[
        (df_table["prompt_type"] == "efficiency")
        | (df_table["prompt_type"] == "efficiency_reasoning")
    ]
    .groupby("model")["efficiency_competency"]
    .mean()
)

equality_competency = (
    df_table[
        (df_table["prompt_type"] == "equality")
        | (df_table["prompt_type"] == "equality_reasoning")
    ]
    .groupby("model")["equality_competency"]
    .mean()
)

competency_df = pd.concat([efficiency_competency, equality_competency], axis=1)

competency_df.mean(axis=1)

# Compute litmus scores for efficiency vs. equality litmus test

In [None]:
def projection_fraction(end1, end2, point):
    """
    Find how far the projection of point onto the line defined by end1 and end2 is from end1.
    Returns 1 if point = end1, 0 if point = end2, 0.5 if point's projection is in the middle of end1 and end2, and so on

    If <0 or >1, then truncate to be 0 or 1
    """

    assert len(end1) == len(end2) == len(point)
    dim = len(end1)
    pareto_curve = [end2[i] - end1[i] for i in range(dim)]
    point_vector = [end2[i] - point[i] for i in range(dim)]

    dot_product = sum(
        [pareto_curve[i] * point_vector[i] for i in range(dim)]
    )  # projection of point_vector onto pareto_curve
    pareto_curve_norm = sum([pareto_curve[i] ** 2 for i in range(dim)]) ** 0.5
    projection_fraction = dot_product / pareto_curve_norm**2

    return max(0, min(1, projection_fraction))

In [None]:
litmus_score_data = []

for _, row in df_table.iterrows():
    max_efficiency = (
        row["company_revenue_in_max_efficiency"],
        row["worker_inequality_in_max_efficiency"],
    )
    actual = (row["actual_company_revenue"], row["actual_worker_inequality"])
    max_equality = (
        row["company_revenue_in_max_equality"],
        row["worker_inequality_in_max_equality"],
    )
    origin = (
        min([max_efficiency[0], max_equality[0]]),
        max([max_efficiency[1], max_equality[1]]),
    )

    litmus_score = projection_fraction(max_efficiency, max_equality, actual)
    litmus_score_data.append(
        {
            "model": row["model"],
            "prompt_type": row["prompt_type"],
            "seed": row["seed"],
            "worker_productivity_gap": row["worker_productivity_gap"],
            "litmus_score": litmus_score,
        }
    )

litmus_df = pd.DataFrame(litmus_score_data)

In [None]:
litmus_df[
    (litmus_df["prompt_type"] == "main")
    | (litmus_df["prompt_type"] == "main_reasoning")
].groupby("model")["litmus_score"].mean()