In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pickle 
import re
import os
import ast
import pandas as pd
import math


In [38]:
import ast
import pandas as pd
import math

def norm_cdf(x):
    return 0.5 * (1 + math.erf(x / math.sqrt(2)))

def load_distribution(path):
    out = {}
    with open(path, 'r') as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            k, v = line.split(':', 1)
            k = k.strip()
            inner = ast.literal_eval(v)
            c = inner.get('  C', inner.get('C'))
            d = inner.get('  D', inner.get('D'))
            out[k] = {'C': int(c), 'D': int(d)}
    return out

def main(llm_name):
    normal = load_distribution(f'../results/{llm_name}_distribution.txt')
    oos    = load_distribution(f'../ood_game/{llm_name}_distribution.txt')

    records = []
    for key, row_n in normal.items():
        # map to the OoS key
        oos_key = key + "_d"
        if oos_key not in oos:
            raise KeyError(f"OoS key '{oos_key}' not found")
        row_o = oos[oos_key]

        scenario, game = key.split('_', 1)
        
        Cn, Dn = row_n['C'], row_n['D']
        Co, Do = row_o['C'], row_o['D']
        # multiply all C and D by 2
        # Cn, Dn = Cn * 2, Dn * 2
        # Co, Do = Co * 2, Do * 2
        Nn, No = Cn + Dn, Co + Do
        
        pn = Cn / Nn
        po = Co / No
        diff = po - pn
        se = math.sqrt(pn*(1-pn)/Nn + po*(1-po)/No)
        z  = diff / se if se>0 else 0
        pval = 2*(1 - norm_cdf(abs(z)))
        ci_low, ci_high = diff - 1.96*se, diff + 1.96*se

        records.append({
            'scenario':       scenario,
            'game':           game,
            'Normal C Ratio': pn,
            'OoS C Ratio':    po,
            'Difference':     diff,
            '95% CI':         (ci_low, ci_high),
            'SE':             se,
            'p-value':        pval,
        })

    df = pd.DataFrame(records).set_index(['scenario','game'])
    # optional ordering
    scenarios = ['team','IR','friendsharing','biz','environment']
    games     = ['prison','delight','staghunt','snowdrift']
    df = df.reindex(pd.MultiIndex.from_product([scenarios,games],
                                               names=df.index.names))

    # formatting
    df['Normal C Ratio'] = df['Normal C Ratio'].apply(lambda x: f"{x:.2f}")
    df['OoS C Ratio']    = df['OoS C Ratio'].apply(lambda x: f"{x:.2f}")
    df['Difference']     = df['Difference'].apply(lambda x: f"{x:.2f}")
    df['SE']             = df['SE'].apply(lambda x: f"{x:.3f}")
    df['95% CI'] = df['95% CI'].apply(lambda x: f"[{x[0]:.3f}, {x[1]:.3f}]")
    df['p-value'] = df['p-value'].apply(lambda p: f"{p:.3f}{'*' if p<0.05 else ''}")

    # write LaTeX
    with open(f'{llm_name}_results_table.tex','w') as outf:
        outf.write(
            df.to_latex(column_format="llrrrrrr",
                        header=["Normal C Ratio","OoS C Ratio","Difference",
                                "95% CI","SE","p-value"],
                        escape=False)
        )
    print(f"Results table for {llm_name} written to {llm_name}_results_table.tex")

if __name__ == "__main__":
    llm_name = "70b"
    main(llm_name)


Results table for 70b written to 70b_results_table.tex
