In [None]:
import os
from openai import OpenAI
import json
import collections
import asyncio
import re

import subprocess
import sys


from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.ui import Console
from autogen_ext.models.openai import OpenAIChatCompletionClient
from dotenv import load_dotenv

from typing import Literal

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.colors import LinearSegmentedColormap

from scipy.stats import gaussian_kde

import glob
from math import isnan

from matplotlib.patches import Rectangle

In [None]:
# main functions to import from src
from src import GGB_Statements,  get_model_shortname
from analysis_functions import ring_csv_to_df, ring_to_roundrobin_df, load_and_clean_single_run, get_agent_shortname

from visualization_functions import plot_by_question, human_kde, h2, plot_IH_v_IB, cleanup_IBvIH_plot

In [None]:
# questions 
QUESTION_JSON = os.path.abspath('GGB_benchmark/GreatestGoodBenchmark.json') 
Inverted_JSON = os.path.abspath('GGB_benchmark/GreatestGoodBenchmarkInverted.json') 
ggb_Qs = GGB_Statements(QUESTION_JSON) 
ggb_iQs = GGB_Statements(Inverted_JSON)

# Specifications for paper

In [None]:
col_width = 3.3125 # inches
text_wdith = 7.0 # inches

# SINGLE ANALYSIS

In [None]:
single_csvs = glob.glob('results/single_ggb**_q1-90_n12.csv')

In [None]:
single_df = pd.DataFrame()

for irun, runcsv in enumerate(single_csvs):
    if 'inverted' in runcsv.lower():
        Qs = ggb_iQs
        label = 'GGB_inverted'
    else:
        Qs = ggb_Qs
        label = 'GGB'

    temp_df = load_and_clean_single_run([runcsv], Qs, label)
    # get the (or corresponding) ous_question_id 
    temp_df['ggb_question_id'] = temp_df['question_id'] % 100
    single_df = pd.concat([single_df, temp_df], ignore_index=True)
    del Qs
    del temp_df
    
# add label (model and runtype)
single_df['label'] = single_df['run_label'] + '_' + single_df['model_name'].apply(get_model_shortname)

In [None]:
# Convert answer column to numeric, coercing errors to NaN
single_df['answer_numeric'] = pd.to_numeric(single_df['answer'], errors='coerce')

# Create the grouped calculations with nanmean and sem handling NaNs
single_by_question = single_df.groupby(['model_name', 'question_num','question_id', 'category', 'label'])['answer_numeric'].agg([
    ('mean', lambda x: np.nanmean(x)),
    ('std',  lambda x: np.nanstd(x, ddof=1)),
    ('sem', lambda x: np.nanstd(x, ddof=1) / np.sqrt(np.sum(~np.isnan(x))))
]).reset_index()

# (2) For each model and category, get mean and sem across all runs and question_nums
single_by_category = single_df.groupby(['model_name', 'category', 'label'])['answer_numeric'].agg([
    ('mean', lambda x: np.nanmean(x)),
    ('std',  lambda x: np.nanstd(x, ddof=1)),
    ('sem', lambda x: np.nanstd(x, ddof=1) / np.sqrt(np.sum(~np.isnan(x))))
]).reset_index()

### SPECIFY COLOR MAP : SINGLES

In [None]:
# SPECIFY COLOR MAP : SINGLES
def get_base_colors(df_in, ending_base = None):
    df = df_in.copy()
    df['base_config'] = df['label'].apply(lambda x: x.lower().replace('ous_', '').replace('_ring', '').replace('inverted_', '').replace('ggb_','').replace('_inverted', ''))
    base_labels = np.sort(df['base_config'].unique())

    if ending_base:
        is_ending_base = [ending_base in x for x in base_labels]
        base_labels = np.append(base_labels[np.invert(is_ending_base)], base_labels[is_ending_base])

    colors = ['darkred', 'darkorange','teal', 'olivedrab', 'deepskyblue', 'darkblue', 'deeppink']
    base_colors = dict(zip(base_labels, colors[:len(base_labels)]))
    return base_colors

In [None]:
f, _ = plot_IH_v_IB (single_by_category, use_std = False, label = 'label', text_size=10, base_colors=get_base_colors(single_by_category))
f = add_linear_combo(f)
ax = f.axes
ax[0].axis('square')
f.set_size_inches(1, 1)




### FIGURE PLOT

In [None]:
# MODIFY FIGURE (SINGLE)

f = cleanup_IBvIH_plot(f)


# Display the updated figure
display(f)

# f.savefig('figures/singleIBvIH.png')
f.savefig('figures/singleIBvIH.svg', bbox_inches='tight', pad_inches=0.1)


# RING ANALYSIS

In [None]:
ring_csvs = glob.glob('results_multi/ggb_**_ensemble_**_q1-90_n12.csv')

In [None]:
# Process the file
# current_Qs = ggb_iQs

# csv_file = ring_csvs[0]
# df = ring_csv_to_df(csv_file, current_Qs)
# print(f"Processing {csv_file}")
# print(f"Raw DataFrame shape: {df.shape}")
# print(f"Columns: {df.columns.tolist() if not df.empty else 'Empty'}")

# Convert to round robin format
# rr_df = ring_to_roundrobin_df(df, current_Qs)
# print(f"Round-robin DataFrame shape: {rr_df.shape}")
# rr_df.head()

In [None]:
import os

# Define preprocessed file paths
preprocessed_dir = "preprocessed"
os.makedirs(preprocessed_dir, exist_ok=True)

ring_df_path = os.path.join(preprocessed_dir, "ring_df.parquet")
ring_rr_df_path = os.path.join(preprocessed_dir, "ring_rr_df.parquet")

# Check if preprocessed files exist
if os.path.exists(ring_df_path) and os.path.exists(ring_rr_df_path):
    print("Loading preprocessed ring data...")
    ring_df = pd.read_parquet(ring_df_path)
    ring_rr_df = pd.read_parquet(ring_rr_df_path)
    
    # Add the question ID if not already present
    if 'ggb_question_id' not in ring_rr_df.columns:
        ring_rr_df['ggb_question_id'] = ring_rr_df['question_id'] % 100
    
    print(f"Loaded ring_df shape: {ring_df.shape}")
    print(f"Loaded ring_rr_df shape: {ring_rr_df.shape}")
    print(f"Sample of ring_rr_df columns: {ring_rr_df.columns.tolist()}")
    
else:
    print("Preprocessed files not found. Processing raw CSV files...")
    
    # Pre-allocate lists to collect dataframes
    ring_dfs = []
    ring_rr_dfs = []

    # Process each CSV file
    for csv_file in ring_csvs:
        print(f"Processing {csv_file}")
        
        # Determine which question set to use
        current_Qs = ggb_iQs if 'inverted' in csv_file else ggb_Qs
        
        # Process the file
        df = ring_csv_to_df(csv_file, current_Qs)
        print(f"  Raw DataFrame shape: {df.shape}")
        
        if not df.empty:
            ring_dfs.append(df)
            
            # Convert to round robin format
            rr_df = ring_to_roundrobin_df(df, current_Qs)
            print(f"  Round-robin DataFrame shape: {rr_df.shape}")
            
            if not rr_df.empty:
                ring_rr_dfs.append(rr_df)
            else:
                print(f"  Warning: Round-robin conversion failed for {csv_file}")
        else:
            print(f"  Warning: No data extracted from {csv_file}")

    # Single concat operations outside the loop
    if ring_dfs:
        ring_df = pd.concat(ring_dfs, ignore_index=True)
        print(f"Combined ring_df shape: {ring_df.shape}")
    else:
        ring_df = pd.DataFrame()
        print("No ring data found")

    if ring_rr_dfs:
        ring_rr_df = pd.concat(ring_rr_dfs, ignore_index=True)
        # Add the question ID
        ring_rr_df['ggb_question_id'] = ring_rr_df['question_id'] % 100
        print(f"Combined ring_rr_df shape: {ring_rr_df.shape}")
        print(f"Sample of ring_rr_df columns: {ring_rr_df.columns.tolist()}")
    else:
        ring_rr_df = pd.DataFrame()
        print("No round-robin data found")

    print(f"Processed {len(ring_dfs)} ring dataframes, {len(ring_rr_dfs)} round-robin dataframes")
    print(f"Total ring records: {len(ring_df)}, Total round-robin records: {len(ring_rr_df)}")
    
    # Save preprocessed data for future use
    if not ring_df.empty:
        ring_df.to_parquet(ring_df_path)
        print(f"Saved ring_df to {ring_df_path}")
    
    if not ring_rr_df.empty:
        ring_rr_df.to_parquet(ring_rr_df_path)
        print(f"Saved ring_rr_df to {ring_rr_df_path}")

In [None]:
ring_df[ring_df['chat_type'].apply(lambda x: 'gemini' in x.lower())]


In [None]:
# ##############################################################################
# ### CONCATENATE INTO MAIN DFs :ALREADY HAPPENS ABOVE!
# ##############################################################################
# ring_df = pd.concat(ring_dfs, ignore_index=True)
# ring_rr_df = pd.concat(ring_rr_dfs, ignore_index=True)


In [None]:
# check the missing repeats/questions
for chat in ring_df.chat_type.unique():
    for q in ring_df['question_num'].unique():
        reps = np.sort(ring_df[((ring_df['chat_type'] == chat) & (ring_df['question_num'] == q))]['run_index'].unique())
        try:
            if np.all(reps == np.arange(1,13)):
                continue
        except: 
            print(f'chat:{chat}, Q:{q}, reps that ran: {reps}')

In [None]:
# Get the answers by each agent
rr_by_agent_df = ring_rr_df.copy()
rr_by_agent_df['agent_shortname']  = rr_by_agent_df['agent_name'].apply(get_agent_shortname)
# More concise alternative using a single apply
rr_by_agent_df['agent_shortname'] = rr_by_agent_df.apply(
    lambda row: row['agent_shortname'] + '_inverted' 
    if 'inverted' in row['chat_type'].lower() 
    else row['agent_shortname'], 
    axis=1)




In [None]:
rr_by_agent_df.agent_shortname.unique()

In [None]:
ring_rr_df[((ring_rr_df['round']==4)& (ring_rr_df['chat_type'] == 'ggb_hetero_ring'))]

In [None]:
test_round_4 = rr_by_agent_df[((rr_by_agent_df['round']==4)& (rr_by_agent_df['chat_type'] == 'ggb_hetero_ring'))]
test_round_4.iloc[1]['full_response']

In [None]:
ring_by_category_and_model = rr_by_agent_df.groupby(['agent_shortname', 'category','round'])['agent_answer'].agg([
    ('mean', lambda x: np.nanmean(x)),
    ('std',  lambda x: np.nanstd(x, ddof=1)),
    ('sem', lambda x: np.nanstd(x, ddof=1) / np.sqrt(np.sum(~np.isnan(x))))
    ]).reset_index()

ring_by_question = ring_rr_df.groupby(['chat_type', 'question_id','question_num','category', 'ggb_question_id', 'round'])['agent_answer'].agg([
    ('mean', lambda x: np.nanmean(x)),
    ('std',  lambda x: np.nanstd(x, ddof=1)),
    ('sem', lambda x: np.nanstd(x, ddof=1) / np.sqrt(np.sum(~np.isnan(x))))
]).reset_index()

# ous_by_question.column
ring_by_category = ring_rr_df.groupby(['chat_type', 'category', 'round'])['agent_answer'].agg([
    ('mean', lambda x: np.nanmean(x)),
    ('std',  lambda x: np.nanstd(x, ddof=1)),
    ('sem', lambda x: np.nanstd(x, ddof=1) / np.sqrt(np.sum(~np.isnan(x))))
]).reset_index()

In [None]:
ring_by_category_and_model.agent_shortname.unique()




## ROUND 1: Individual Agents's responses in Hetero and Homo Ring Runs

In [None]:
ring_by_category_and_model.agent_shortname.unique()

In [None]:
f,_ = plot_IH_v_IB (ring_by_category_and_model[ring_by_category_and_model['round'] == 1], use_std = False, ax_lims=[1,7], label='agent_shortname')

In [None]:
# MODIFY FIGURE (SINGLE)

f = cleanup_IBvIH_plot(f)
# Display the updated figure
display(f)

# f.savefig('figures/singleIBvIH.png')
f.savefig('figures/agent_by_cat_ring_IBvIH.pdf', bbox_inches='tight', pad_inches=0.1)



## Mixed Single and MAS (see if round 1, message 1 and Singles are the same)

In [None]:
mixed_single_and_MAS = pd.DataFrame()
ring_by_category_and_model[ring_by_category_and_model['round'] == 4]

## Ring By Question

In [None]:
# Before calling plot_by_question, add validation
round_4_data = ring_by_question[ring_by_question['round'] == 4]

if round_4_data.empty:
    print("Warning: No data found for round 4")
elif 'chat_type' not in round_4_data.columns:
    print("Warning: 'chat_type' column not found in data")
elif round_4_data['chat_type'].isna().all():
    print("Warning: All 'chat_type' values are NaN")
else:
    # Check if we have any valid groups
    valid_groups = round_4_data.groupby('chat_type').size()
    if len(valid_groups) == 0:
        print("Warning: No valid groups found for chat_type")
    else:
        print(f"Found {len(valid_groups)} chat types: {valid_groups.index.tolist()}")
        
        f = plot_by_question(data = round_4_data, group_by = 'chat_type', category_order=['IH','IB'], 
            match_inverted_colors=True,
            inverted_indicator='inverted', error_col= 'sem')

        ax = f.axes[0]  # Get the axes from the figure
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3)
        plt.tight_layout()  # Adjust layout to accommodate the legend
# save plot as pdf
        f.savefig('figures/round4_ring_by_question.pdf', bbox_inches='tight', pad_inches=0.1)
        plt.show()

## Round 4 Homo and Hetero Ring ensembles 

In [None]:
round_4_rr_df = ring_rr_df[ring_rr_df['round'] == 4]

In [None]:
round_4_deepseek = round_4_rr_df[(round_4_rr_df['category'] == 'IH') & (round_4_rr_df['chat_type'].apply(lambda x: 'deepseek' in x))]


### get better chat names for plotting

In [None]:
ring_plot_df = ring_by_category.copy()
# is_not_homo_gemini = ring_plot_df['chat_type'].apply(lambda x: 'gemini' not in x)
# ring_plot_df = ring_plot_df [is_not_homo_gemini]

In [None]:
ring_plot_df.chat_type.unique() 

### NEW NAMES FOR PLOTTING

In [None]:
ring_name_mapping = {
    'ggb_claude-3.5-haiku_ring': 'claude', 
    'ggb_inverted_claude_ring' : 'inverted_claude',
    'ggb_gpt_ring' : 'GPT',
    'ggb_inverted_gpt_ring': 'GPT_inverted',
    'ggb_deepseek-chat-v3-0324_ring' : 'deepseek',
    'ggb_deepseek-chat-v3-0324_ring_inverted' : 'deepseek_inverted',
    'ggb_llama-3.1-8b-instruct_ring' : 'llama',
    'ggb_llama-3.1-8b-instruct_ring_inverted': 'llama_inverted',
    'ggb_inverted_qwen_ring' : 'qwen_inverted',
    'ggb_qwen-2.5-7b-instruct_ring': 'qwen',
    'ggb_hetero_ring' : 'mixed', 
    'ggb_inverted_hetero_ring' : 'mixed_inverted', 
    'ggb_inverted_gemini_ring' : 'gemini_inverted',
    'ggb_gemini_ring' : 'gemini'
    }

ring_plot_df['label'] = ring_plot_df['chat_type'].apply(lambda x: ring_name_mapping.get(x, x))
ring_plot_base_colors = get_base_colors(ring_plot_df, ending_base = 'mixed')

In [None]:
ring_plot_df['label'].unique()

In [None]:
ring_plot_base_colors = get_base_colors(ring_plot_df, ending_base = 'mixed')
ring_plot_base_colors

In [None]:
plot_ring_round_4 = ring_plot_df[ring_plot_df['round'] == 4]

In [None]:
f , _= plot_IH_v_IB (plot_ring_round_4, use_std = False, label='label',base_colors=ring_plot_base_colors )
f = add_linear_combo(f)

In [None]:
f = cleanup_IBvIH_plot(f)

display(f)
f.savefig('figures/ring_IHvIB.svg', bbox_inches='tight', pad_inches=0.1)


In [None]:
# f = plot_IH_v_IB (ring_by_category[ring_by_category['round'] == 4], use_std = False,ax_lims=[0,8])

## Convergence for a round

In [None]:
from visualization_functions import plot_rr_round

In [None]:
if False: # just to avoid massive plotting (these plots still need help to be publicaiton ready)
    all_chat_types = ring_rr_df.chat_type.unique()
    for chat in all_chat_types:
        chat_rr_df = ring_rr_df[ring_rr_df['chat_type']==chat].copy()
        start_rep = chat_rr_df['repeat_index'].min()
        end_rep = chat_rr_df['repeat_index'].max()

        for rep in range(start_rep, end_rep + 1):
            # print(f'{rep}')
            this_rep_df = chat_rr_df[chat_rr_df['repeat_index']==rep].copy()
            plot_rr_round(this_rep_df , round = 4)
    # TODO: average over rounds!
    # TODO: why is it repeating 2x (there should be 5 repeats??)

# STAR ANALYSIS

In [None]:
# going to need to chnage the chat type for each one because currently has the supervisor name in the chat type

## get the hetero ring 

In [None]:
hetero_ring_by_category_df = plot_ring_round_4[(plot_ring_round_4['label'].apply(lambda x: 'mixed' in x))]
hetero_ring_by_category_df.columns


## star df 

In [None]:
from analysis_functions import star_csv_to_df

In [None]:
star_csvs = glob.glob('results_multi_star/**_star_super**_q1-90_1n2.csv')
evilstar_csvs = glob.glob('results_multi_star/**_star_evil**_q1-90_n12.csv')

all_star_csvs = glob.glob('results_multi_star/**star**_q1-90_n12.csv')

In [None]:
all_star_csvs

In [None]:
# Note: we dont get to round 4 in all cases? 
# test_df = pd.read_csv(evilstar_csvs[0])
# # test_df.loc[0]['config_details']
# message_counts = (test_df['agent_responses'].apply(lambda x: len(json.loads(x))))
# # test_df.loc[0]['agent_responses']
# message_counts[(message_counts < 24)]
# #message_counts.unique()

In [None]:
star_dfs = []
for csv_file in all_star_csvs:
    if 'inverted' in csv_file:
        current_Qs = ggb_iQs
    else:
        current_Qs = ggb_Qs
    
    df = star_csv_to_df(csv_file, current_Qs, csv_file)
    star_dfs.append(df)
    del df
    del current_Qs

star_df = pd.concat(star_dfs, ignore_index=True)

# weird but inverted doesnt have bool entry for is_response_off_topic (but does have both 0 and 1 as entries)
star_df['is_response_off_topic'] = star_df['is_response_off_topic'].apply(lambda x: bool(x) if type(x) != bool else x)
star_df['ggb_question_id'] = star_df['question_id'].apply(lambda x: x % 100)

### make new labels mapping

In [None]:
star_label_map = {'ggb_star_evil_supervisor_gpt-4o-mini': 'evil_central_gpt',
                  'ggb_star_supervisor_gpt-4o-mini' : 'central_gpt',
                  'ggb_star_supervisor_gpt-4o-mini_inverted': 'central_gpt_inverted'}
                  #'ggb_star_supervisor_qwen-2.5-7b-instruct': 'central_qwen'}

star_df['label'] = star_df['chat_type'].apply(lambda x: star_label_map[x])

In [None]:
star_df['label'].unique()

## Grouping for Star

In [None]:
star_by_question = star_df.groupby(['chat_type', 'question_id','question_num','category', 'ggb_question_id', 'round'])['agent_answer'].agg([
    ('mean', lambda x: np.nanmean(x)),
    ('std',  lambda x: np.nanstd(x, ddof=1)),
    ('sem', lambda x: np.nanstd(x, ddof=1) / np.sqrt(np.sum(~np.isnan(x))))
]).reset_index()


star_by_category = star_df.groupby(['chat_type', 'category', 'round', 'label'])['agent_answer'].agg([
    ('mean', lambda x: np.nanmean(x)),
    ('std',  lambda x: np.nanstd(x, ddof=1)),
    ('sem', lambda x: np.nanstd(x, ddof=1) / np.sqrt(np.sum(~np.isnan(x))))
]).reset_index()

In [None]:
# make a new dataframe for both star and ring
plot_star_and_ring_df = pd.concat([hetero_ring_by_category_df, star_by_category[star_by_category['round']==4]])

In [None]:
plot_star_and_ring_df

### assign colors to labels

In [None]:
df = plot_star_and_ring_df.copy()
df['base_config'] = df['label'].apply(lambda x: x.lower().replace('ous_', '').replace('_ring', '').replace('inverted_', '').replace('ggb_','').replace('_inverted', ''))
base_labels = df['base_config'].unique()

base_colors = {
    base_labels[0]: 'deeppink',
    base_labels[1]: 'gold',
    base_labels[2]: 'dodgerblue'
    }

### new legend labels

In [None]:
new_legend_mapping = {
    'mixed': 'Mixed RR', 
    'mixed_inverted': 'Mixed RR (inverted)',
    'central_gpt': 'GPT Star',
    'central_gpt_inverted': 'GPT Star (inverted)',
    'evil_central_gpt':'RT GPT Star',
    'LinearCombo' : 'LinearCombo'
}

In [None]:
f, _ = plot_IH_v_IB (plot_star_and_ring_df, use_std = False, ax_lims=[1,7], label= 'label', base_colors=base_colors)
f = add_linear_combo(f)

In [None]:
f = cleanup_IBvIH_plot(f, legend_labels=new_legend_mapping)
display(f)
f.savefig('figures/star_and_ring_IHvIB.svg', bbox_inches='tight', pad_inches=0.1)


In [None]:
round4_star = star_by_question

In [None]:
f = plot_by_question(data = round_4_data, group_by = 'chat_type', category_order=['IH','IB'], 
            match_inverted_colors=True,
            inverted_indicator='inverted', error_col= 'sem')

ax = f.axes[0]  # Get the axes from the figure
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3)
plt.tight_layout()  # Adjust layout to accommodate the legend
# save plot as pdf
f.savefig('figures/round_4_by_chat_type.pdf', bbox_inches='tight', pad_inches=0.1)

## EXPLORING CATEGORIES

In [None]:
VALID_CATEGORY_NAMES = [
    "UTILITARIAN_REASONING",
    "DEONTOLOGICAL_REASONING", 
    "VIRTUE_ETHICS_REASONING",
    "CARE_ETHICS_REASONING",
    "FAIRNESS_JUSTICE_REASONING",
    "AUTHORITY_BASED_REASONING",
    "LOYALTY_BASED_INGROUP_REASONING",
    "LIBERTY_AUTONOMY_REASONING",
    "PURITY_SANCTITY_REASONING",
    "HARM_AVOIDANCE_NON_MALEFICENCE",
    "PEER_PRESSURE_CONVERGENCE",
    "SYCOPHANCY_EXCESSIVE_AGREEMENT",
    "MAINTAINING_DISSENT_INDEPENDENCE",
    "CONVERGENCE_TO_SUPERVISOR_FRAMING",
    "BUILDING_ON_OTHERS_REASONING",
    "EXPLICIT_DISAGREEMENT_COUNTER_ARGUMENT",
    "PRAGMATIC_BALANCING_CONTEXTUAL",
    "RISK_ASSESSMENT_PRECAUTIONARY",
    "SELF_INTEREST_INDIVIDUAL_PRIORITY",
    "LEGALISTIC_POLICY_BASED_REASONING",
    "META_REASONING_CLARIFICATION_FOCUS",
    "ANALOGICAL_CASE_BASED_REASONING",
    "RATIONALE_VAGUE_INCOMPLETE",
    "RATIONALE_CIRCULAR_RESTATING_ANSWER",
    "RATIONALE_TANGENTIAL_IRRELEVANT",
    "NO_CLEAR_RATIONALE_PROVIDED"
]

def simplify_categories(df_cat, valid_cats= VALID_CATEGORY_NAMES):
    if df_cat:
        if isinstance(df_cat, str):
            df_cat = df_cat.upper()
            return [x if x in df_cat else df_cat for x in valid_cats]

def flatten_and_remove_empty_categories(df):
    flat_list = list(df['selected_categories'].values.flatten())
    while None in flat_list:
        flat_list.remove(None)
    while '' in flat_list:
        flat_list.remove('')
    
    new_list = (','.join(flat_list)).split(',')

    return np.array(new_list)

    


In [None]:
ring_for_cats = ring_rr_df.copy()
ring_for_cats['selected_categories'] = ring_for_cats['selected_categories'].apply(lambda x: [] if not x else x.split(','))
ring_exploded = ring_for_cats.explode('selected_categories')
ring_exploded['mostly_valid_categories'] = ring_exploded['selected_categories'].apply(simplify_categories)
ring_exploded_more = ring_exploded.explode('mostly_valid_categories')


In [None]:
ring_exploded['selected_categories'].value_counts()

In [None]:
ring_exploded_more['mostly_valid_categories'].value_counts()

In [None]:
single_for_cats = single_df.copy()
single_for_cats['selected_categories'] = single_for_cats['selected_categories'].apply(lambda x: [] if not x else x.split(','))
single_exploded = single_for_cats.explode('selected_categories')
single_exploded['mostly_valid_categories'] = single_exploded['selected_categories'].apply(simplify_categories)
single_exploded_more = single_exploded.explode('mostly_valid_categories')


In [None]:
single_exploded['selected_categories'].value_counts()

In [None]:
single_exploded_more['selected_categories'].value_counts()

In [None]:
star_for_cats = star_df.copy()
star_for_cats['selected_categories'] = star_for_cats['selected_categories'].apply(lambda x: [] if not x else x.split(','))
star_exploded = star_for_cats.explode('selected_categories')
star_exploded['mostly_valid_categories'] = star_exploded['selected_categories'].apply(simplify_categories)

star_exploded_more = single_exploded.explode('mostly_valid_categories')


In [None]:
star_exploded['selected_categories'].value_counts()


In [None]:
star_exploded_more['selected_categories'].value_counts()


In [None]:
single_exploded_more['selected_categories'].value_counts().plot(kind='bar', figsize=(12, 6))
plt.title('Single (Log)')
plt.yscale('log') 

In [None]:
ring_exploded_more['selected_categories'].value_counts().plot(kind='bar', figsize=(12, 6))
plt.title('ring (Log)')
plt.yscale('log') 

In [None]:
star_exploded_more['selected_categories'].value_counts().plot(kind='bar', figsize=(12, 6))
plt.title('star (log)')
plt.yscale('log') 

In [None]:
all_single_categories = np.unique(flatten_and_remove_empty_categories(single_df))


all_star_categories = np.unique(flatten_and_remove_empty_categories(star_df))

In [None]:
all_star_categories

In [None]:
all_ring_categories

In [None]:
list(all_single_categories)

In [None]:
simple_single_cats = [simplify_categories(x) for x in list(all_single_categories)]
single_categories = np.unique(simple_single_cats)

simple_ring_cats = [simplify_categories(x) for x in all_ring_categories]
ring_categories = np.unique(simple_ring_cats)

simple_ring_cats = [simplify_categories(x) for x in all_star_categories]
star_categories = np.unique(simple_ring_cats)


In [None]:
list(single_categories)

In [None]:
list(ring_categories)

In [None]:
list(star_categories)

# Stats Tests

## 1. Singles vs Ring Homogeneous Rd 1- expectation is equivalence/fail to reject null of equivalence (high p)

In [None]:
dataframes = [star_exploded_more, ring_exploded_more, single_exploded_more]  # List of your dataframes

# Get unique categories from each dataframe
category_sets = []
for df in dataframes:
    unique_categories = set(df['mostly_valid_categories'].unique())
    category_sets.append(unique_categories)

# Find intersection of all sets
common_categories = set.intersection(*category_sets)
print(common_categories)

In [None]:
common_categories = list(common_categories).remove(None)

## get the common categories in each grouping

### filter for groupings

In [None]:
from src import models, get_model_shortname
models = [get_model_shortname(m) for m in models]

single_df['agent_name'] = single_df['model_name']

subset_dfs = []
subset_names = []
for agent in models:
    subset_dfs.append( filter_df_for_categorization(single_df, agent = agent) )
    subset_names.append('single_' + agent)
    subset_dfs.append( filter_df_for_categorization(ring_rr_df, agent = agent) )
    subset_names.append('homo_rr_' + agent)
    subset_dfs.append( filter_df_for_categorization(ring_rr_df, agent = agent, hetero=True) )
    subset_names.append('mixed_rr_' + agent)
    subset_dfs.append( filter_df_for_categorization(star_df, agent = agent) )
    subset_names.append('star_' + agent)

subset_exploded = [explode_mostly_valid_categories(df) for df in subset_dfs]

In [None]:
# Simple count of occurrences in exploded dataframes
category_counts_df = pd.DataFrame({
    df_name: exploded_df['mostly_valid_categories'].value_counts().reindex(common_categories, fill_value=0)
    for df_name, exploded_df in zip(subset_names, subset_exploded)
})

print("Raw counts (occurrences):")
print(category_counts_df)

# Get total original records
total_original_counts = pd.Series({
    df_name: orig_df.shape[0] 
    for df_name, orig_df in zip(subset_names, subset_dfs)
})

print(f"\nTotal original records: {total_original_counts.to_dict()}")

# Simple division
frequency_df = category_counts_df.div(total_original_counts, axis=1)

print("\nNormalized frequencies:")
print(frequency_df.round(4))

print("\nPercentages:")
print((frequency_df * 100).round(2))

In [None]:
exploded_dfs = [single_exploded_more, ring_exploded_more, star_exploded_more]
original_dfs = [single_df, ring_rr_df, star_df]

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu, norm
from IPython.display import display, HTML

results = []

# Extract unique labels from single_by_category for matching
available_labels = single_by_category['label'].unique()

# Loop over agent_shortnames in rr_by_agent_df
for agent in rr_by_agent_df['agent_shortname'].unique():
    if 'inverted' in agent:
        # e.g. "claude_inverted" -> "GGB_inverted_claude"
        base = agent.replace('_inverted', '')
        label = f'GGB_inverted_{base}'
    else:
        label = f'GGB_{agent}'

    if label not in available_labels:
        continue

    y = rr_by_agent_df[(rr_by_agent_df['agent_shortname'] == agent) &
                       (rr_by_agent_df['round'] == 1) &
                        (rr_by_agent_df['message_index'] == 1)
      ]['agent_answer'].dropna()
    x = single_df[single_df['label'] == label]['answer_numeric'].dropna()

    if len(x) < 2 or len(y) < 2:
        continue

    stat, p = mannwhitneyu(x, y, alternative='two-sided')

    try:
        z = norm.ppf(1 - p / 2)
        r = z / np.sqrt(len(x) + len(y))
    except:
        r = np.nan

    results.append({
        'agent': agent,
        'label': label,
        'n_x': len(x),
        'n_y': len(y),
        'U': stat,
        'p_value': p,
        'effect_size_r': r
    })

results_df = pd.DataFrame(results)

# Round selected float columns
results_df['p_value'] = results_df['p_value'].round(2)
results_df['effect_size_r'] = results_df['effect_size_r'].round(2)


# for better display/sorting:
# Create a helper column with base agent name (without "_inverted")
results_df['agent_base'] = results_df['agent'].str.replace('_inverted', '', regex=False)

# Optional: set a consistent order based on unique agent bases
agent_order = results_df['agent_base'].drop_duplicates().tolist()

# Sort by agent base first, then put normal agent before inverted
results_df = results_df.sort_values(
    by=['agent_base', 'agent'],
    key=lambda col: col if col.name != 'agent' else col.apply(lambda x: (x.endswith('_inverted'), x))
).reset_index(drop=True)

# Drop the helper column if not needed
results_df = results_df.drop(columns='agent_base')

#print(results_df)
results_df






In [None]:
latex_table = r"""\begin{table}[ht]
\centering
\begin{tabular}{l c c c c c}
\hline
\textbf{Agent} & $\mathbf{N_{singles}}$ & $\mathbf{N_{round robin}}$ & $\mathbf{U}$ & $\mathbf{p}$ & $\mathbf{r}$ \\
\hline
"""

for _, row in results_df.iterrows():
    latex_table += f"{row['agent']} & {int(row['n_x'])} & {int(row['n_y'])} & {int(row['U'])} & {row['p_value']:.2f} & {row['effect_size_r']:.2f} \\\\\n"

latex_table += r"""\hline
\end{tabular}
\caption{Mann–Whitney U test results comparing single model vs. round robin first model responses across agents for original and double-inverted GGB questions.}
\label{tab:agent_mwu}
\end{table}
"""

print(latex_table)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
import numpy as np

# Plot styling
sns.set(style="whitegrid", context="notebook")

# Font sizes
TITLE_SIZE = 18
LABEL_SIZE = 16
TICK_SIZE = 14
LEGEND_SIZE = 14

# Setup plot grid
fig, axes = plt.subplots(2, 3, figsize=(20, 12), sharex=True, sharey=True)
axes = axes.flatten()

# Colors
colors = {
    'Single': 'royalblue',
    'Round Robin': 'tomato'
}

# Response scale
response_range = np.arange(1, 8)

# SEM estimator for counts frequencies
def count_and_sem(values):
    total = len(values)
    counts = np.array([np.sum(values == val) for val in response_range])
    rel_freqs = counts / total if total > 0 else np.zeros_like(counts)
    sems = np.sqrt(counts) / total if total > 0 else np.zeros_like(counts)  # Binomial SEM approximation
    return rel_freqs, sems

# Agent bases (limit to 6 for 2x3 grid)
agent_bases = results_df['agent'].str.replace('_inverted', '', regex=False).unique()[:6]

for idx, base_agent in enumerate(agent_bases):
    ax = axes[idx]

    # Original data
    agent = base_agent
    label = f'GGB_{base_agent}'
    x = single_df[single_df['label'] == label]['answer_numeric'].dropna().astype(int)
    y = rr_by_agent_df[(rr_by_agent_df['agent_shortname'] == agent) &
                       (rr_by_agent_df['round'] == 1) &
                       (rr_by_agent_df['message_index'] == 1)
                      ]['agent_answer'].dropna().astype(int)

    # Inverted data
    agent_inv = f'{base_agent}_inverted'
    label_inv = f'GGB_inverted_{base_agent}'
    x_inv = single_df[single_df['label'] == label_inv]['answer_numeric'].dropna().astype(int)
    y_inv = rr_by_agent_df[(rr_by_agent_df['agent_shortname'] == agent_inv) &
                           (rr_by_agent_df['round'] == 1) &
                           (rr_by_agent_df['message_index'] == 1)
                          ]['agent_answer'].dropna().astype(int)

    # Counts and SEMs
    x_counts, x_sems = count_and_sem(x)
    y_counts, y_sems = count_and_sem(y)
    x_inv_counts, x_inv_sems = count_and_sem(x_inv)
    y_inv_counts, y_inv_sems = count_and_sem(y_inv)

    # Original markers and lines
    ax.errorbar(response_range, x_counts, yerr=x_sems,
                fmt='o', color=colors['Single'], markersize=8, label='Single (original)')
    ax.errorbar(response_range, y_counts, yerr=y_sems,
                fmt='o', color=colors['Round Robin'], markersize=8, label='Round Robin (original)')
    # Connecting line for original single → ring
    ax.plot(response_range, x_counts, color=colors['Single'], alpha=0.5, linewidth=1.5)
    ax.plot(response_range, y_counts, color=colors['Round Robin'], alpha=0.5, linewidth=1.5)

    # Inverted (hollow markers, dashed lines)
    if any(x_inv_counts) and any(y_inv_counts):
        ax.errorbar(response_range, x_inv_counts, yerr=x_inv_sems,
                    fmt='o', markerfacecolor='none', markeredgecolor=colors['Single'],
                    color=colors['Single'], linestyle='--', markersize=8, label='Single (inverted)')
        ax.errorbar(response_range, y_inv_counts, yerr=y_inv_sems,
                    fmt='o', markerfacecolor='none', markeredgecolor=colors['Round Robin'],
                    color=colors['Round Robin'], linestyle='--', markersize=8, label='Round Robin (inverted)')

    # Formatting
    ax.set_title(base_agent, fontsize=TITLE_SIZE)
    ax.set_xlim(0.5, 7.5)
    ax.set_xticks(response_range)
    ax.set_xlabel('Answer', fontsize=LABEL_SIZE)
    ax.set_ylabel('Percent of Total Answers', fontsize=LABEL_SIZE)
    ax.tick_params(axis='both', labelsize=TICK_SIZE)
    ax.legend(fontsize=LEGEND_SIZE, loc='upper left')
    ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1.0))

# Remove unused subplots
for j in range(len(agent_bases), 6):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.subplots_adjust(top=0.92)
fig.suptitle("Answer Frequencies with SEM: Single vs Round Robin First Responses (Original and Inverted)", fontsize=22)
fig.savefig('figures/statstests_distr_SinglevsFirstRing.pdf', bbox_inches='tight', pad_inches=0.1)
plt.show()


## 2. Singles vs Ring Homogeneous Rd 4- expectation is difference/succeed in rejecting null of equivalence (low p)

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu, norm
from IPython.display import display, HTML

results = []

# Extract unique labels from single_by_category for matching
available_labels = single_by_category['label'].unique()

# Loop over agent_shortnames in rr_by_agent_df
for agent in rr_by_agent_df['agent_shortname'].unique():
    if 'inverted' in agent:
        # e.g. "claude_inverted" -> "GGB_inverted_claude"
        base = agent.replace('_inverted', '')
        label = f'GGB_inverted_{base}'
    else:
        label = f'GGB_{agent}'

    if label not in available_labels:
        continue

    y = rr_by_agent_df[
        (rr_by_agent_df['agent_shortname'] == agent) &
        (rr_by_agent_df['round'] == 4) &
        (~rr_by_agent_df['chat_type'].isin(['ggb_hetero_ring', 'ggb_inverted_hetero_ring']))
    ]['agent_answer'].dropna()
    x = single_df[single_df['label'] == label]['answer_numeric'].dropna()

    if len(x) < 2 or len(y) < 2:
        continue

    n1 = len(x)
    n2 = len(y)
    stat, p = mannwhitneyu(x, y, alternative='two-sided')

    try:
        #z = norm.ppf(1 - p / 2)
        #r = z / np.sqrt(len(x) + len(y))
        # instead Manual z-score calculation for p~0
        mean_u = n1 * n2 / 2
        std_u = np.sqrt(n1 * n2 * (n1 + n2 + 1) / 12)
        z = (stat - mean_u) / std_u

        # Effect size r
        r = abs(z) / np.sqrt(n1 + n2)
    except:
        r = np.nan

    results.append({
        'agent': agent,
        'label': label,
        'n_x': len(x),
        'n_y': len(y),
        'U': stat,
        'p_value': p,
        'effect_size_r': r
    })

results_df = pd.DataFrame(results)

# Round selected float columns
results_df['p_value'] = results_df['p_value'].round(2)
results_df['effect_size_r'] = results_df['effect_size_r'].round(2)


# for better display/sorting:
# Create a helper column with base agent name (without "_inverted")
results_df['agent_base'] = results_df['agent'].str.replace('_inverted', '', regex=False)

# Optional: set a consistent order based on unique agent bases
agent_order = results_df['agent_base'].drop_duplicates().tolist()

# Sort by agent base first, then put normal agent before inverted
results_df = results_df.sort_values(
    by=['agent_base', 'agent'],
    key=lambda col: col if col.name != 'agent' else col.apply(lambda x: (x.endswith('_inverted'), x))
).reset_index(drop=True)

# Drop the helper column if not needed
results_df = results_df.drop(columns='agent_base')

#print(results_df)
results_df


In [None]:
latex_table = r"""\begin{table}[ht]
\centering
\begin{tabular}{l c c c c c}
\hline
\textbf{Agent} & $\mathbf{N_{Singles}}$ & $\mathbf{N_{Round robin}}$ & $\mathbf{U}$ & $\mathbf{p}$ & $\mathbf{r}$ \\
\hline
"""

for _, row in results_df.iterrows():
    latex_table += f"{row['agent']} & {int(row['n_x'])} & {int(row['n_y'])} & {int(row['U'])} & {row['p_value']:.2f} & {row['effect_size_r']:.2f} \\\\\n"

latex_table += r"""\hline
\end{tabular}
\caption{Mann–Whitney U test results comparing single model vs. round robin homogeneous ensembles after response convergence, across agents for original and double-inverted GGB questions.}
\label{tab:statstests_tab_SinglevsRingHomogeneous}
\end{table}
"""

print(latex_table)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
import numpy as np


# Plot styling
sns.set(style="whitegrid", context="notebook")

# Font sizes
TITLE_SIZE = 18
LABEL_SIZE = 16
TICK_SIZE = 14
LEGEND_SIZE = 14

# Setup plot grid
fig, axes = plt.subplots(2, 3, figsize=(20, 12), sharex=True, sharey=True)
axes = axes.flatten()

# Colors
colors = {
    'Single': 'royalblue',
    'Round Robin': 'tomato'
}

# Response scale
response_range = np.arange(1, 8)

# SEM estimator for counts frequencies
def count_and_sem(values):
    total = len(values)
    counts = np.array([np.sum(values == val) for val in response_range])
    rel_freqs = counts / total if total > 0 else np.zeros_like(counts)
    sems = np.sqrt(counts) / total if total > 0 else np.zeros_like(counts)  # Binomial SEM approximation
    return rel_freqs, sems

# Agent bases (limit to 6 for 2x3 grid)
agent_bases = results_df['agent'].str.replace('_inverted', '', regex=False).unique()[:6]

for idx, base_agent in enumerate(agent_bases):
    ax = axes[idx]

    # Original data
    agent = base_agent
    label = f'GGB_{base_agent}'
    x = single_df[single_df['label'] == label]['answer_numeric'].dropna().astype(int)
    y = rr_by_agent_df[
        (rr_by_agent_df['agent_shortname'] == agent) &
        (rr_by_agent_df['round'] == 4) &
        (~rr_by_agent_df['chat_type'].isin(['ggb_hetero_ring', 'ggb_inverted_hetero_ring']))
    ]['agent_answer'].dropna().astype(int)

    # Inverted data
    agent_inv = f'{base_agent}_inverted'
    label_inv = f'GGB_inverted_{base_agent}'
    x_inv = single_df[single_df['label'] == label_inv]['answer_numeric'].dropna().astype(int)
    y_inv = rr_by_agent_df[(rr_by_agent_df['agent_shortname'] == agent_inv) &
                           (rr_by_agent_df['round'] == 4) &
                         (~rr_by_agent_df['chat_type'].isin(['ggb_hetero_ring', 'ggb_inverted_hetero_ring']))
     ]['agent_answer'].dropna().astype(int)

    # Counts and SEMs
    x_counts, x_sems = count_and_sem(x)
    y_counts, y_sems = count_and_sem(y)
    x_inv_counts, x_inv_sems = count_and_sem(x_inv)
    y_inv_counts, y_inv_sems = count_and_sem(y_inv)

    # Original markers and lines
    ax.errorbar(response_range, x_counts, yerr=x_sems,
                fmt='o', color=colors['Single'], markersize=8, label='Single (original)')
    ax.errorbar(response_range, y_counts, yerr=y_sems,
                fmt='o', color=colors['Round Robin'], markersize=8, label='Round Robin Homog. (original)')
    # Connecting line for original single → ring
    ax.plot(response_range, x_counts, color=colors['Single'], alpha=0.5, linewidth=1.5)
    ax.plot(response_range, y_counts, color=colors['Round Robin'], alpha=0.5, linewidth=1.5)

    # Inverted (hollow markers, dashed lines)
    if any(x_inv_counts) and any(y_inv_counts):
        ax.errorbar(response_range, x_inv_counts, yerr=x_inv_sems,
                    fmt='o', markerfacecolor='none', markeredgecolor=colors['Single'],
                    color=colors['Single'], linestyle='--', markersize=8, label='Single (inverted)')
        ax.errorbar(response_range, y_inv_counts, yerr=y_inv_sems,
                    fmt='o', markerfacecolor='none', markeredgecolor=colors['Round Robin'],
                    color=colors['Round Robin'], linestyle='--', markersize=8, label='Round Robin Homog. (inverted)')

    # Formatting
    ax.set_title(base_agent, fontsize=TITLE_SIZE)
    ax.set_xlim(0.5, 7.5)
    ax.set_xticks(response_range)
    ax.set_xlabel('Answer', fontsize=LABEL_SIZE)
    ax.set_ylabel('Percent of Total Answers', fontsize=LABEL_SIZE)
    ax.tick_params(axis='both', labelsize=TICK_SIZE)
    ax.legend(fontsize=LEGEND_SIZE, loc='upper left')
    ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1.0))

# Remove unused subplots
for j in range(len(agent_bases), 6):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.subplots_adjust(top=0.92)
fig.suptitle("Answer Frequencies with SEM: Single vs Round Robin Homogeneous (Original and Inverted)", fontsize=22)
fig.savefig('figures/statstests_distr_SinglevsRingHomogeneous.pdf', bbox_inches='tight', pad_inches=0.1)
plt.show()


In [None]:
rr_by_agent_df.chat_type.unique()

## Singles v Ring Homogeneous

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import wilcoxon
import re

# to calculate stats need to maintain raw measurements in each data set
single_stats_periteration_df

ring_by_category_and_model[
    (ring_by_category_and_model['round'] == 1) &
    (ring_by_category_and_model['message_index'] == 1)
]
