In [None]:
import json
# from dash import Dash, html, dcc, Input, Output, callback
import plotly.express as px
import plotly.graph_objs as go
import glob
import re

import pandas as pd

## How to Use

Add metrics to plot in the function below \
Use grab_exp_run_stats() to grab the particular dataset to plot (exp=None for all exp, sof1=None for all sof1 and sof2) \
Concatenate dfs if you'd like to combine datasets to graph, at the end df1 is plotted \
Might need to install plotly, dash, and update Flask/Werkzeug

In [None]:
def add_metrics(run):
    run['alignment_avg'] = (run['a1_sof_alignment'] + run['a2_sof_alignment']) / 2
    run['value_avg'] = (run['a1_value'] + run['a2_value']) / 2
    return run

In [None]:
sofs = ["max", "max_sum", "max_min", "max_prod", "min", "max_diff"]

def grab_run_stats(model, seed, exp, sof1, sof2, half=False):
    h = []
    exp_config_file = glob.glob('deal_or_no_deal/config/exp' + str(exp) + '.json')[0]
    with open(exp_config_file, 'r') as f:
        exp_num = int(re.search('\d+', exp_config_file).group(0))
        config = json.load(f)
        chain_of_thought = False
        random_point_vals = True
        if config['deception']:
            deception_text = 'deception'
        else:
            deception_text = 'no_deception'
        same_prompt = not config['hidden_point_vals'] # handle weird conditional entanglement b/w same_prompt and hidden_point_vals
        if half:
            exp_glob = glob.glob(f"deal_or_no_deal/exp/{model + '-' + str(seed)}/{config['persuasion_taxonomy']}_{deception_text}_{config['theory_of_mind']}_{model}_{sof1}_{sof2}_{'half'}_{config['sof_visible']}_{chain_of_thought}_{config['hidden_point_vals']}_{random_point_vals}_{same_prompt}.json")
        else:
            exp_glob = glob.glob(f"deal_or_no_deal/exp/{model + '-' + str(seed)}/{config['persuasion_taxonomy']}_{deception_text}_{config['theory_of_mind']}_{model}_{sof1}_{sof2}_{config['sof_visible']}_{chain_of_thought}_{config['hidden_point_vals']}_{random_point_vals}_{same_prompt}.json")
        for exp_json in exp_glob:
            with open(exp_json, 'r') as f2:
                data = json.load(f2)
                max_runs = 10
                for run in data:
                    if run['valid'] and run['valid_u_post_u_prior'] and not run['decided_no_agreement'] and not run['exceeded_rounds'] and max_runs > 0:
                        run['model_seed'] = model + '-' + str(seed)
                        run['exp_num'] = 'exp' + str(exp_num)
                        max_runs -= 1
                        h.append(add_metrics(run))
    return h

def grab_exp_run_stats(model, seed, sof1=None, sof2='max', exp=None, half=False):
    h = []
    if exp == None:
        for exp in range(11):
            if sof1 == None: # print all combos of sofs for all exp
                for sf1 in sofs:
                    for sf2 in sofs:
                        h.extend(grab_run_stats(model, seed, exp, sf1, sf2, half=half))
            else: # runs for only one specific sof
                h.extend(grab_run_stats(model, seed, exp, sof1, sof2, half=half))
    else:
        if sof1 == None: # print all combos of sofs for one exp
            for sf1 in sofs:
                for sf2 in sofs:
                    h.extend(grab_run_stats(model, seed, exp, sf1, sf2, half=half))
        else: # runs for only one specific sof and exp
            h.extend(grab_run_stats(model, seed, exp, sof1, sof2, half=half))
    return pd.DataFrame.from_dict(h)

def print_run2_stats(run):
    result = []
    
    result.append(f'sof_a1_label: {run["sof_a1_label"]}')
    result.append(f'sof_a2_label: {run["sof_a2_label"]}')
    result.append(f'a1_value: {run["a1_value"]}')
    result.append(f'a2_value: {run["a2_value"]}')
    result.append(f'a1_utility: {run["a1_function"]}')
    result.append(f'a2_utility: {run["a2_function"]}')
    result.append(f'a1_alignment: {run["a1_sof_alignment"]}')
    result.append(f'a2_alignment: {run["a2_sof_alignment"]}')
    
    result.append('')
    result.append(f'a1_deception_count: {run["a1_deception_count"]}')
    result.append(f'a1_deception_count_avg: {run["a1_deception_count_avg"]}')
    result.append(f'a1_deception_score_avg: {run["a1_deception_score_avg"]}')
    result.append(f'a2_deception_count: {run["a2_deception_count"]}')
    result.append(f'a2_deception_count_avg: {run["a2_deception_count_avg"]}')
    result.append(f'a2_deception_score_avg: {run["a2_deception_score_avg"]}')
    
    result.append('')
    result.append(f'a1_falsehood_count: {run["a1_falsehood_count"]}')
    result.append(f'a1_falsehood_count_avg: {run["a1_falsehood_count_avg"]}')
    result.append(f'a1_falsehood_score_avg: {run["a1_falsehood_score_avg"]}')
    result.append(f'a2_falsehood_count: {run["a2_falsehood_count"]}')
    result.append(f'a2_falsehood_count_avg: {run["a2_falsehood_count_avg"]}')
    result.append(f'a2_falsehood_score_avg: {run["a2_falsehood_score_avg"]}')
    
    result.append('')
    result.append(f'a1_pareto_deception: {run["a1_pareto_deception"]}')
    result.append(f'a2_pareto_deception: {run["a2_pareto_deception"]}')
    result.append(f'a1_deceptive_regret_beliefs_convo_lvl: {run["a1_deceptive_regret_beliefs_convo_lvl"]}')
    result.append(f'a2_deceptive_regret_beliefs_convo_lvl: {run["a2_deceptive_regret_beliefs_convo_lvl"]}')
    result.append(f'a1_point_guesses: {run["a1_point_guesses"]}')
    result.append(f'a2_point_guesses: {run["a2_point_guesses"]}')
    
    result.append('')
    result.append(f'valid_u_post_u_prior: {run["valid_u_post_u_prior"]}')
    result.append(f'a1_u_post_u_prior_avg: {run["a1_u_post_u_prior_avg"]}')
    result.append(f'a1_v_post_v_prior_avg: {run["a1_v_post_v_prior_avg"]}')
    result.append(f'a1_a_post_a_prior_avg: {run["a1_a_post_a_prior_avg"]}')
    result.append(f'a2_u_post_u_prior_avg: {run["a2_u_post_u_prior_avg"]}')
    result.append(f'a2_v_post_v_prior_avg: {run["a2_v_post_v_prior_avg"]}')
    result.append(f'a2_a_post_a_prior_avg: {run["a2_a_post_a_prior_avg"]}')
    
    result.append('')
    result.append(f'index: {run["index"]}')
    result.append(f'a1_vals: {[run["a1_book_val"], run["a1_hat_val"], run["a1_ball_val"]]}')
    result.append(f'a2_vals: {[run["a2_book_val"], run["a2_hat_val"], run["a2_ball_val"]]}')
    
    result.append('')
    result.append(run['conversation'])
    
    return '\n'.join(result)

In [None]:
# Modify with dataset to grab
#df1 = grab_exp_run_stats('gpt-3.5-turbo', '45')


# TOM no deception
# df1 = grab_exp_run_stats('gpt-3.5-turbo', '50')
# df2 = grab_exp_run_stats('Llama-3-70B-Instruct', '50')
# df1 = pd.concat([df1, df2], axis=0)

df1 = grab_exp_run_stats('gpt-3.5-turbo', '72')
df1['model'] = 'gpt-3.5-turbo'
df2 = grab_exp_run_stats('Llama-3.1-70B-Instruct', '72')
df2['model'] = 'Llama-3.1-70B-Instruct'
df3 = grab_exp_run_stats('Llama-3.1-8B-Instruct', '72')
df3['model'] = 'Llama-3.1-8B-Instruct'
df4 = grab_exp_run_stats('mistral-instruct', '72')
df4['model'] = 'mistral-instruct'
df5 = grab_exp_run_stats('mixtral-instruct', '72')
df5['model'] = 'mixtral-instruct'
df1 = pd.concat([df1, df2, df3, df4, df5], axis=0)
#df1 = df1[df1['sof_a1_label'] == 'max']
def preprocess(df):
    df['sof_a1_label'] = df['sof_a1_label'].replace({'_': ''}, regex=True)
    df['sof_a2_label'] = df['sof_a2_label'].replace({'_': ''}, regex=True)
    df['a1_taxicabs_mean_mean'] = (df['a1_taxicabs_mean'] +  df['a1_third_person_taxicabs_mean']) / 2
    df['a2_taxicabs_mean_mean'] = (df['a2_taxicabs_mean'] +  df['a2_third_person_taxicabs_mean']) / 2

preprocess(df1)
print(df1.columns)
df1

In [None]:
df_half = grab_exp_run_stats('Llama-3.1-70B-Instruct', '73', half=True)
df_half['model'] = 'Llama-3.1-70B-Instruct'
df_half2 = grab_exp_run_stats('gpt-3.5-turbo', '73', half=True)
df_half2['model'] = 'gpt-3.5-turbo'
df_half = pd.concat([df_half, df_half2], axis=0)
preprocess(df_half)
print(df_half.columns)
df_half

In [None]:
df_half2

In [None]:
df_half['half_agent']

In [None]:
# Blacklist certain columns from dropdown selection for readability
# remove columns from blacklist if you think they're helpful
# columns will still be outputted if point is clicked
blacklist = [
    'a1_books', 'a1_hats', 'a1_balls', 
    'a2_books', 'a2_hats', 'a2_balls', 
    'conversation', 'valid', 'valid_guess', 
    'decided_no_agreement', 'exceeded_rounds',
    'index', 'prompt', 'chain_of_thought',
    'a1_book_val', 'a1_hat_val', 'a1_ball_val', 
    'a2_book_val', 'a2_hat_val','a2_ball_val',
    'deception_list', 'falsehood_list',
    'running_cost_for_iteration', 'agent_thoughts',
    'a1_point_guesses', 'a2_point_guesses', 
    'valid_u_post_u_prior', 'u_post_u_prior_list'
    ]
metric_selection = [metric for metric in df1.columns if metric not in blacklist]
metric_selection

In [None]:
styles = {
    'pre': {
        'border': 'thin lightgrey solid',
        'backgroundColor': '#FFFFFF',
        'overflowX': 'scroll'
    }
}

app.layout = html.Div([
    html.Div([
            dcc.Markdown("""
                **x-axis metric**
            """),
            dcc.Dropdown(
                metric_selection,
                'a1_pareto_deception',
                id='crossfilter-xaxis-column',
            )
        ],
        style={'width': '49%', 'display': 'inline-block'}),
    
    html.Div([
            dcc.Markdown("""
                **y-axis metric**
            """),
            dcc.Dropdown(
                metric_selection,
                'a1_sof_alignment',
                id='crossfilter-yaxis-column',
            )
        ],
        style={'width': '49%', 'display': 'inline-block'}),

    dcc.Graph(
        id='graph'
        #figure=fig
    ),
    
    html.Div([
            dcc.Markdown("""
                **color metric**
            """),
            dcc.Dropdown(
                metric_selection,
                'exp_num',
                id='crossfilter-color-column',
            )
        ],
        style={'width': '49%', 'display': 'inline-block'}),

    html.Div(className='row', children=[
        html.Div([
            dcc.Markdown("""
                **Click Data**

                Click on points in the graph.
            """),
            html.Pre(id='click-data', style=styles['pre']),
        ], className='three columns'),
    ])
])

@callback(
    Output('graph', 'figure'),
    Input('crossfilter-xaxis-column', 'value'),
    Input('crossfilter-yaxis-column', 'value'),
    Input('crossfilter-color-column', 'value'))
def update_graph(xaxis_column_name, yaxis_column_name, color_column_name):
    fig1 = px.scatter(df1, x=xaxis_column_name, y=yaxis_column_name, color=color_column_name, opacity=0.5, trendline='ols', custom_data=df1.columns)
    results = px.get_trendline_results(fig1)
    newnames = {results.iloc[i][color_column_name]:results.iloc[i][color_column_name] + ': ' + str(results['px_fit_results'][i].rsquared) for i in range(len(results))}
    
    fig = px.scatter(df1, x=xaxis_column_name, y=yaxis_column_name, color=color_column_name, opacity=0.5, custom_data=df1.columns)
    fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                        legendgroup = newnames[t.name],
                                        hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                        )
                    )
    fig.show()
    fig.update_layout(clickmode='event+select')
    fig.update_traces(marker=dict(size=1))
    return fig


@callback(
    Output('click-data', 'children'),
    Input('graph', 'clickData'))
def display_click_data(clickData):
    if clickData:
        data = clickData["points"][0]["customdata"]
        data_dict = {df1.columns[i]: data[i] for i in range(len(data))}
        return print_run2_stats(data_dict)
    return json.dumps(clickData, indent=2)

# Note: if server is already running, kernel might need to be restarted more than once
# http://127.0.0.1:8371/
app.run_server(host='127.0.0.1', port='8371', debug=True)

In [None]:
df1['sof'] = df1.sof_a1_label + '_' + df1.sof_a2_label
df_antideception = df1[(df1['a1_deceptive_regret_beliefs_convo_lvl'] < 0)]
df_deception = df1[(df1['a1_deceptive_regret_beliefs_convo_lvl'] > 0)]
df_none = df1[(df1['a1_deceptive_regret_beliefs_convo_lvl'] == 0)]

df_antideception = df_antideception.sort_values(by=['exp_num'])
df_deception = df_deception.sort_values(by=['exp_num'])
df_none = df_none.sort_values(by=['exp_num'])
df_deception

In [None]:
def rename_exp(fig):
    newnames = dict(exp0='shared points, no deception', exp1='no deception', exp2='deception', exp3='deception, full persuasion taxonomy', exp4='deception, TOM, full persuasion taxonomy', exp5='no deception, full persuasion taxonomy', exp6='deception, reduced persuasion taxonomy', exp7='deception, TOM, reduced', exp8='no deception, reduced')
    fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                        legendgroup = newnames[t.name],
                                        hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                        )
                    )

In [None]:

fig = px.histogram(df_deception, x='sof', color='exp_num', title='a1 deceptive regret > 0 (indicator Agent 1 is deceptive)').update_xaxes(categoryorder='category ascending')
rename_exp(fig)
fig.show()

In [None]:
fig = px.histogram(df_antideception, x='sof', color='exp_num', title='a1 deceptive regret < 0 (indicator Agent 1 is altruistic)').update_xaxes(categoryorder='category ascending')
rename_exp(fig)
fig.show()

In [None]:
fig = px.histogram(df_none, x='sof', color='exp_num', title='a1 deceptive regret == 0 (indicator Agent 1 is neither)').update_xaxes(categoryorder='category ascending')
rename_exp(fig)
fig.show()

In [None]:
df_antideception = df_antideception.sort_values(by=['a1_sof_alignment'])
df_deception = df_deception.sort_values(by=['a1_sof_alignment'])
df_none = df_none.sort_values(by=['a1_sof_alignment'])

In [None]:
fig = px.bar(df_deception, x='sof', color='a1_sof_alignment', title='a1 deceptive regret > 0', color_continuous_scale=px.colors.sequential.Turbo).update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:

fig = px.histogram(df_deception, x='sof', color='a1_sof_alignment', title='a1 deceptive regret > 0 (indicator Agent 1 is deceptive)', color_discrete_sequence=[px.colors.sequential.Turbo[i // 18] for i in range(len(px.colors.sequential.Turbo)*18)]).update_xaxes(categoryorder='category ascending')
#fig.update_traces(showlegend=False)
#fig.add_trace(go.Bar(x=df_deception['sof'], y=df_deception['a2_sof_alignment'], name='a2 deceptive regret > 0', marker=dict(color=px.colors.sequential.Turbo, coloraxis="coloraxis")))
#fig.update_layout(coloraxis=dict(colorscale='Bluered_r'), showlegend=True)
#fig.update_layout(coloraxis_showscale=True)
# fig.update_layout(coloraxis_colorbar=dict(
#     title="Number of Bills per Cell",
#     thicknessmode="pixels", thickness=50,
#     lenmode="pixels", len=200,
#     yanchor="top", y=1,
#     ticks="outside", ticksuffix=" bills",
#     dtick=5
# ))
fig.show()

In [None]:
fig = px.histogram(df_antideception, x='sof', color='a1_sof_alignment', title='a1 deceptive regret < 0 (indicator Agent 1 is altruistic)', color_discrete_sequence=[px.colors.sequential.Turbo[i // 24] for i in range(len(px.colors.sequential.Turbo)*24)]).update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
fig = px.histogram(df_none, x='sof', color='a1_sof_alignment', title='a1 deceptive regret == 0 (indicator agent is neither)', color_discrete_sequence=[px.colors.sequential.Turbo[i // 7] for i in range(len(px.colors.sequential.Turbo)*7)]).update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
df_antideception_a1 = df1[(df1['a1_deceptive_regret_beliefs_convo_lvl'] < 0)]
df_deception_a1 = df1[(df1['a1_deceptive_regret_beliefs_convo_lvl'] > 0)]
df_antideception_a2 = df1[(df1['a2_deceptive_regret_beliefs_convo_lvl'] < 0)]
df_deception_a2 = df1[(df1['a2_deceptive_regret_beliefs_convo_lvl'] > 0)]
df_none_a1 = df1[(df1['a1_deceptive_regret_beliefs_convo_lvl'] == 0)]
df_none_a2 = df1[(df1['a2_deceptive_regret_beliefs_convo_lvl'] == 0)]

df_antideception_a1['sof_individual'] = df_antideception_a1['sof_a1_label']
df_deception_a1['sof_individual'] = df_deception_a1['sof_a1_label']
df_antideception_a1['sof_alignment'] = df_antideception_a1['a1_sof_alignment']
df_deception_a1['sof_alignment'] = df_deception_a1['a1_sof_alignment']
df_none_a1['sof_individual'] = df_none_a1['sof_a1_label']
df_none_a1['sof_alignment'] = df_none_a1['a1_sof_alignment']

df_antideception_a2['sof_individual'] = df_antideception_a2['sof_a2_label']
df_deception_a2['sof_individual'] = df_deception_a2['sof_a2_label']
df_antideception_a2['sof_alignment'] = df_antideception_a2['a2_sof_alignment']
df_deception_a2['sof_alignment'] = df_deception_a2['a2_sof_alignment']
df_none_a2['sof_individual'] = df_none_a2['sof_a2_label']
df_none_a2['sof_alignment'] = df_none_a2['a2_sof_alignment']

df_deception_combined = pd.concat([df_deception_a1, df_deception_a2], axis=0)
df_antideception_combined = pd.concat([df_antideception_a1, df_antideception_a2], axis=0)
df_none_combined = pd.concat([df_none_a1, df_none_a2], axis=0)

In [None]:
df_antideception_combined = df_antideception_combined.sort_values(by=['exp_num'])
df_deception_combined = df_deception_combined.sort_values(by=['exp_num'])
df_none_combined = df_none_combined.sort_values(by=['exp_num'])

In [None]:
fig = px.histogram(df_deception_combined, x='sof_individual', color='exp_num', title='a1 or a2 deceptive regret > 0 (indicator agent is deceptive)').update_xaxes(categoryorder='category ascending')
rename_exp(fig)
fig.show()

In [None]:
fig = px.histogram(df_antideception_combined, x='sof_individual', color='exp_num', title='a1 or a2 deceptive regret < 0 (indicator agent is altruistic)').update_xaxes(categoryorder='category ascending')
rename_exp(fig)
fig.show()

In [None]:
fig = px.histogram(df_none_combined, x='sof_individual', color='exp_num', title='a1 or a2 deceptive regret == 0 (indicator agent is neither)').update_xaxes(categoryorder='category ascending')
rename_exp(fig)
fig.show()

In [None]:
df_deception_combined = df_deception_combined.sort_values(by=['sof_alignment'])
df_antideception_combined = df_antideception_combined.sort_values(by=['sof_alignment'])
df_none_combined = df_none_combined.sort_values(by=['sof_alignment'])

In [None]:
fig = px.bar(df_deception_combined, x='sof_individual', color='sof_alignment', title='a1 or a2 deceptive regret > 0 (indicator agent is deceptive)', color_continuous_scale=px.colors.sequential.Turbo).update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
fig = px.histogram(df_deception_combined, x='sof_individual', color='sof_alignment', title='a1 or a2 deceptive regret > 0 (indicator agent is deceptive)', color_discrete_sequence=[px.colors.sequential.Turbo[i // 26] for i in range(len(px.colors.sequential.Turbo)*26)]).update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
fig = px.histogram(df_antideception_combined, x='sof_individual', color='sof_alignment', title='a1 or a2 deceptive regret < 0 (indicator agent is altruistic)', color_discrete_sequence=[px.colors.sequential.Turbo[i // 33] for i in range(len(px.colors.sequential.Turbo)*33)]).update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
fig = px.histogram(df_none_combined, x='sof_individual', color='sof_alignment', title='a1 or a2 deceptive regret == 0 (indicator agent is neither)', color_discrete_sequence=[px.colors.sequential.Turbo[i // 11] for i in range(len(px.colors.sequential.Turbo)*11)][:-22] + [px.colors.sequential.Turbo[-3]]*6 + [px.colors.sequential.Turbo[-3]]*5 + [px.colors.sequential.Turbo[-2]]*13 + [px.colors.sequential.Turbo[-1]]).update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
fig = px.density_heatmap(df1, x="sof_a1_label", y="sof_a2_label", z="a1_sof_alignment", histfunc="avg")
fig.show()

In [None]:
fig = px.density_heatmap(df1, x="sof_a1_label", y="sof_a2_label", z="a2_sof_alignment", histfunc="avg")
fig.show()

In [None]:
fig = px.density_heatmap(df1, x="sof_a1_label", y="sof_a2_label", z="a1_deceptive_regret_beliefs_convo_lvl", color_continuous_scale=px.colors.sequential.Blues, histfunc="avg", text_auto=True)
fig.show()

In [None]:
fig = px.density_heatmap(df1, x="sof_a1_label", y="sof_a2_label", z="a2_deceptive_regret_beliefs_convo_lvl", color_continuous_scale=px.colors.sequential.Blues, histfunc="avg", text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='model', color='sof_alignment', title='a1 or a2 deceptive regret == 0 (indicator agent is neither)', color_discrete_sequence=[px.colors.sequential.Turbo[i // 11] for i in range(len(px.colors.sequential.Turbo)*11)][:-22] + [px.colors.sequential.Turbo[-3]]*6 + [px.colors.sequential.Turbo[-3]]*5 + [px.colors.sequential.Turbo[-2]]*13 + [px.colors.sequential.Turbo[-1]]).update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
def label_deception(column_name):
    def helper(row):
        if row[column_name] < 0:
            return 'Not Deceptive'
        if row[column_name] > 0:
            return 'Deceptive'
        if row[column_name] == 0:
            return 'Neither'        
    return helper
df1_deception_label1 = df1.copy(deep=True)
df1_deception_label2 = df1.copy(deep=True)
df1_deception_label1['deception_posterior_label'] = df1.apply(label_deception('a1_deceptive_regret_beliefs_convo_lvl'), axis=1)
df1_deception_label2['deception_posterior_label'] = df1.apply(label_deception('a2_deceptive_regret_beliefs_convo_lvl'), axis=1)

df1_deception_label = pd.concat([df1_deception_label1, df1_deception_label2], axis=0)

In [None]:
fig = px.histogram(df1_deception_label, x='deception_posterior_label', color='model', text_auto=True,title='Deceptive Regret on Posterior by Model', barmode='group').update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
df_antideception_combined['deception_posterior_label'] = 'Not Deceptive'
df_deception_combined['deception_posterior_label'] = 'Deceptive'
df_none_combined['deception_posterior_label'] = 'Neither'
df_sof_combined = pd.concat([df_deception_combined, df_antideception_combined, df_none_combined], axis=0)

In [None]:
fig = px.histogram(df_sof_combined, x='deception_posterior_label', color='sof_individual', text_auto=True, title='Deceptive Regret on Posterior by SOF', barmode='group').update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
fig = px.histogram(df1_deception_label, x='deception_posterior_label', color='exp_num', text_auto=True, title='Deceptive Regret on Posterior by Experiment', barmode='group').update_xaxes(categoryorder='category ascending')
rename_exp(fig)
fig.show()

In [None]:
fig = px.histogram(df1, x='a1_taxicabs_mean', color='model', title='a1 taxicabs mean', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a2_taxicabs_mean', color='model', title='a2 taxicabs mean', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a1_third_person_taxicabs_mean', color='model', title='a1 third person taxicabs mean', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a2_third_person_taxicabs_mean', color='model', title='a2 third person taxicabs mean', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a1_taxicabs_mean_mean', color='model', title='a1 taxicabs mean mean', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a2_taxicabs_mean_mean', color='model', title='a2 taxicabs mean mean', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a1_taxicabs_last', color='model', title='a1 taxicabs last', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a2_taxicabs_last', color='model', title='a2 taxicabs last', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a1_third_person_taxicabs_last', color='model', title='a1 third person taxicabs last', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a2_third_person_taxicabs_last', color='model', title='a2 third person taxicabs last', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a1_deceptive_regret_beliefs_convo_lvl', color='model', title='a1 deceptive regret', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a2_deceptive_regret_beliefs_convo_lvl', color='model', title='a2 deceptive regret', text_auto=True)
fig.show()

In [None]:
fig = px.histogram(df1, x='a2_deceptive_regret_beliefs_convo_lvl', color='exp_num', title='a1 deceptive regret', text_auto=True)
fig.show()

In [None]:
def label_deception_avg(column_name):
    avg = df1[column_name].mean()
    def helper(row):
        if row[column_name] < avg:
            return 'Not Deceptive'
        if row[column_name] > avg:
            return 'Deceptive'
        if row[column_name] == avg:
            return 'Neither'        
    return helper
df1_ground_deception_label1 = df1.copy(deep=True)
df1_ground_deception_label2 = df1.copy(deep=True)
df1_ground_deception_label1['deception_posterior_label'] = df1.apply(label_deception_avg('a1_third_person_taxicabs_mean'), axis=1)
df1_ground_deception_label2['deception_posterior_label'] = df1.apply(label_deception_avg('a2_third_person_taxicabs_mean'), axis=1)

df1_ground_deception_label = pd.concat([df1_ground_deception_label1, df1_ground_deception_label2], axis=0)

In [None]:
fig = px.histogram(df1_ground_deception_label, x='deception_posterior_label', color='model', text_auto=True,title='Deceptive Regret on Ground Truth by Model', barmode='group').update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
fig = px.histogram(df1_ground_deception_label, x='deception_posterior_label', color='exp_num', text_auto=True, title='Deceptive Regret on Posterior by Experiment', barmode='group').update_xaxes(categoryorder='category ascending')
rename_exp(fig)
fig.show()

In [None]:
fig = px.histogram(df_half, x='a1_deceptive_regret_beliefs_convo_lvl', color='half_agent', text_auto=True, title='a1 deceptive regret on posterior  (Llama-3.1-70B-Instruct + 1/3 gpt-3.5.turbo)')
fig.show()

In [None]:
fig = px.histogram(df_half, x='a2_deceptive_regret_beliefs_convo_lvl', text_auto=True, color='half_agent', title='a2 deceptive regret on posterior  (Llama-3.1-70B-Instruct + 1/3 gpt-3.5-turbo)')
fig.show()

In [None]:
fig = px.histogram(df_half, x='a1_sof_alignment', color='half_agent', text_auto=True, title='a1 alignment distribution (Llama-3.1-70B-Instruct + 1/3 gpt-3.5-turbo)')
fig.show()

In [None]:
fig = px.histogram(df_half, x='a2_sof_alignment', color='half_agent', text_auto=True, title='a2 alignment distribution (Llama-3.1-70B-Instruct + 1/3 gpt-3.5-turbo)')
fig.show()

In [None]:
fig = px.histogram(df_half, x='a1_taxicabs_mean', color='half_agent', text_auto=True, title='a1 taxicabs mean  (Llama-3.1-70B-Instruct + 1/3 gpt-3.5-turbo)')
fig.show()

In [None]:
fig = px.histogram(df_half, x='a2_taxicabs_mean', color='half_agent', text_auto=True, title='a2 taxicabs mean (Llama-3.1-70B-Instruct + 1/3 gpt-3.5-turbo)')
fig.show()

In [None]:
fig = px.histogram(df_half, x='a1_third_person_taxicabs_mean', color='half_agent', text_auto=True, title='a1 third person taxicabs mean (Llama-3.1-70B-Instruct + 1/3 gpt-3.5-turbo)')
fig.show()

In [None]:
fig = px.histogram(df_half, x='a2_third_person_taxicabs_mean', color='half_agent', text_auto=True, title='a2 third person taxicabs mean (Llama-3.1-70B-Instruct + 1/3 gpt-3.5-turbo)')
fig.show()