In [71]:
import polars as pl

# Scan the big parquet file and tot up the proportions of the winners
df_counts = (
    pl.scan_parquet("results.parquet")
    .group_by("monkey_strategy", "wolf_strategy")
    .agg(pl.col("winner").value_counts(normalize=True))
    .explode("winner")
    .unnest("winner")
    .collect()
)
df_counts

monkey_strategy,wolf_strategy,winner,proportion
str,str,str,f64
"""metric_strength""","""metric_count""","""monkey""",0.579
"""metric_strength""","""metric_count""","""wolf""",0.421
"""metric_position""","""metric_count""","""monkey""",0.309
"""metric_position""","""metric_count""","""wolf""",0.691
"""random""","""metric_strength""","""wolf""",0.956
…,…,…,…
"""random_spot_win""","""metric_position""","""monkey""",0.3
"""random_spot_win""","""random_spot_win""","""monkey""",0.562
"""random_spot_win""","""random_spot_win""","""wolf""",0.438
"""metric_position""","""random_spot_win""","""wolf""",0.204


In [92]:
strategies = sorted(pl.concat(df_counts.select("monkey_strategy", "wolf_strategy")).unique())
strategies

['metric_count',
 'metric_position',
 'metric_strength',
 'random',
 'random_spot_win']

In [94]:
from collections import defaultdict


analysis_by_strategy: dict[str, pl.DataFrame] = {}

for strategy_a in strategies:
    data = defaultdict(list)
    for strategy_b in strategies:
        monkey_row = df_counts.filter(
            pl.col("monkey_strategy") == strategy_a,
            pl.col("wolf_strategy") == strategy_b,
            pl.col("winner") == "monkey",
        )
        assert monkey_row.height == 1
        monkey_proportion = monkey_row["proportion"][0]
        
        wolf_row = df_counts.filter(
            pl.col("wolf_strategy") == strategy_a,
            pl.col("monkey_strategy") == strategy_b,
            pl.col("winner") == "wolf",
        )
        assert wolf_row.height == 1
        wolf_proportion = wolf_row["proportion"][0]
        
        data["vs"].append(strategy_b)
        data["m"].append(monkey_proportion - 0.5)
        data["w"].append(wolf_proportion - 0.5)

    analysis_by_strategy[strategy_a] = pl.DataFrame(data=data)

In [97]:
analysis_by_strategy["metric_position"]

vs,m,w
str,f64,f64
"""metric_count""",-0.191,-0.232
"""metric_position""",0.044,-0.044
"""metric_strength""",-0.267,-0.302
"""random""",0.431,0.376
"""random_spot_win""",0.296,0.2
