In [1]:
import codenamesLLM
import pandas as pd
import openpyxl
from tqdm import tqdm
import xlsxwriter

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
codenamesLLM.play_game_mixed_guessers(red_guesser_model=["gpt-3.5-turbo", "gpt-4o-mini", "claude-3-5-sonnet-latest"], verbose=True, masterverbose=True)

-----------------------------------
[1;33mGAME PARAMETERS:[0m
language = eng, 
number of cards = 25, 
number of killer cards = 1, 

[1;31mRED TEAM PARAMETERS:[0m
Red team starts first, number of red cards = 8, 
red spymaster model = gpt-4o-mini, 
red guesser model = ['gpt-3.5-turbo', 'gpt-4o-mini', 'claude-3-5-sonnet-latest'], 

-----------------------------------
The actual board is: 


0,1,2,3,4
TIME,WELL,ROOT,SHARK,HIMALAYAS
HOOD,HOOK,HOSPITAL,BUFFALO,STADIUM
PASTE,JUPITER,DRAFT,DANCE,TIE
BAR,TAIL,MOON,MATCH,CHINA
CHAIR,CRASH,DISEASE,TRACK,HORSE


it is [1;31mRED[0m team turn: 

The [1;31mRED[0m spymaster's clue is: ACCIDENT (2). 
 

Team [1;31mRED[0m guesser 0 thinks: I think words like CRASH, TRACK, and ACCIDENT could be related to the clue "ACCIDENT." Let's start with CRASH. Any objections or other suggestions, team?.
Team [1;31mRED[0m guesser 1 thinks: For the clue "ACCIDENT," I think "CRASH" stands out immediately as it's directly related to accidents. Let's start with that word. What do you all think?.
Team [1;31mRED[0m guesser 2 thinks: I think CRASH is the most obvious connection to ACCIDENT. We often hear about car crashes or accidents. I vote for CRASH as our first guess..
Do you want to talk? [True, True, True]
Team [1;31mRED[0m guesser 0 said: Let's go with CRASH as our first guess for the clue ACCIDENT..
Team [1;31mRED[0m guesser 1 said: Let's guess CRASH..
Team [1;31mRED[0m guesser 2 said: Agree with everyone - CRASH is the clear choice here. Lets go for it..
Do you want to talk? [True, False, False

KeyboardInterrupt: 

# PLAY AGGREGATE GAMES BEST MODEL

In [None]:
# Path to your Excel file
file_path = r"experiment_data\best_model\best_model.xlsx" #compy the model_tournament_input to use one new

# Read the Excel file into a DataFrame
df = pd.read_excel(file_path)

# Iterate through the rows using titled columns
for index, row in tqdm(df.iterrows(), total=len(df)):
    # Access values using column titles
    red_team = row['red_model']  # Replace with the actual column title for the red team
    blue_team = row['blue_model']  # Replace with the actual column title for the blue team

    if red_team == "grok-claude":
        red_master_model = "grok-beta"
        red_guesser_model = "claude-3-5-sonnet-latest"
    else:
        red_master_model = red_team
        red_guesser_model = red_team

    if blue_team == "grok-claude":
        blue_master_model = "grok-beta"
        blue_guesser_model = "claude-3-5-sonnet-latest"
    else:
        blue_master_model = blue_team
        blue_guesser_model = blue_team

    playable = True
    already_played = pd.notna(row['winner'])

    if ((pd.notna(red_team) and pd.notna(blue_team)) and not(already_played)) and playable:  # Check if both values are not NaN
        try:
            # Call your function and get the result
            print(f"playing {red_team} vs {blue_team}...")
            result = codenamesLLM.play_game(red_master_model=red_master_model,
                                            red_guesser_model=red_guesser_model,
                                            blue_master_model=blue_master_model,
                                            blue_guesser_model=blue_guesser_model)
            
            print(result)
            red_stats = codenamesLLM.analyze_team_guesses(result[3], "RED")
            blue_stats = codenamesLLM.analyze_team_guesses(result[3], "BLUE")

            df.at[index, 'red_model'] = red_team
            df.at[index, 'blue_model'] = blue_team
            df.at[index, 'winner'] = result[0]
            df.at[index, 'red_avg_words_2guess'] = red_stats['average_expected_guesses']
            df.at[index, 'blue_avg_words_2guess'] = blue_stats['average_expected_guesses']
            df.at[index, 'red_avg_words_guessed'] = red_stats['average_correct_guesses']
            df.at[index, 'blue_avg_words_guessed'] = blue_stats['average_correct_guesses']
            df.at[index, 'reason'] = result[1]
            df.at[index, 'red_turns'] = red_stats['total_hints']
            df.at[index, 'blue_turns'] = blue_stats['total_hints']
            df.at[index, 'red_cib'] = result[4]
            df.at[index, 'blue_cib'] = result[5]

        except Exception as e:
            print(f"skipped game: {e}")

    # Write the updated DataFrame back to the same Excel file
    df.to_excel(file_path, index=False)

# TOURNAMENT DATA PROCESS

In [3]:
import pandas as pd

def process_tournament_data(input_path, output_path):
    # Load the data
    df = pd.read_excel(input_path)

    # Function to calculate metrics
    def calculate_metrics(group):
        wins = group['winner'] == group['role'].str.upper()
        losses = ~wins
        card_finished = group['reason'] == 'cards finished'
        killer_word = group['reason'] == 'killer word selected'

        return {
            "model_name": group["model"].iloc[0],
            "games_played": len(group),
            "wins": wins.sum(),
            "win_percentage": 100 * wins.sum() / len(group),
            "win_by_cards_finished": (wins & card_finished).sum(),
            "wins_by_killer_words": (wins & killer_word).sum(),
            "losses_by_card_finished": (losses & card_finished).sum(),
            "losses_by_killer_words": (losses & killer_word).sum(),
            "average_word_to_guess": group['avg_words_2guess'].mean(),
            "average_word_to_guess_when_wins": group.loc[wins, 'avg_words_2guess'].mean(),
            "average_word_to_guess_when_lose": group.loc[losses, 'avg_words_2guess'].mean(),
            "average_word_to_guess_when_wins_by_ending_cards": group.loc[wins & card_finished, 'avg_words_2guess'].mean(),
            "average_word_to_guess_when_loses_by_ending_cards": group.loc[losses & card_finished, 'avg_words_2guess'].mean(),
            "average_word_to_guess_when_wins_by_killer_card": group.loc[wins & killer_word, 'avg_words_2guess'].mean(),
            "average_word_to_guess_when_loses_by_killer_card": group.loc[losses & killer_word, 'avg_words_2guess'].mean(),
            "average_word_guessed": group['avg_words_guessed'].mean(),
            "average_word_guessed_when_wins": group.loc[wins, 'avg_words_guessed'].mean(),
            "average_word_guessed_when_lose": group.loc[losses, 'avg_words_guessed'].mean(),
            "average_word_guessed_when_wins_by_ending_cards": group.loc[wins & card_finished, 'avg_words_guessed'].mean(),
            "average_word_guessed_when_loses_by_ending_cards": group.loc[losses & card_finished, 'avg_words_guessed'].mean(),
            "average_word_guessed_when_wins_by_killer_card": group.loc[wins & killer_word, 'avg_words_guessed'].mean(),
            "average_word_guessed_when_loses_by_killer_card": group.loc[losses & killer_word, 'avg_words_guessed'].mean(),
            "average_turns": group['turns'].mean(),
            "average_turns_when_wins": group.loc[wins, 'turns'].mean(),
            "average_turns_when_lose": group.loc[losses, 'turns'].mean(),
            "average_turns_when_wins_by_ending_cards": group.loc[wins & card_finished, 'turns'].mean(),
            "average_turns_when_loses_by_ending_cards": group.loc[losses & card_finished, 'turns'].mean(),
            "average_turns_when_wins_by_killer_cards": group.loc[wins & killer_word, 'turns'].mean(),
            "average_turns_when_loses_by_killer_cards": group.loc[losses & killer_word, 'turns'].mean(),
            "total_cib": group['cib'].sum()
        }

    # Reshape the dataset to treat roles equivalently
    red_df = df.rename(columns=lambda x: x.replace('red_', '')).assign(role='red', model=df['red_model'])
    blue_df = df.rename(columns=lambda x: x.replace('blue_', '')).assign(role='blue', model=df['blue_model'])
    combined_df = pd.concat([red_df, blue_df], ignore_index=True)

    # Metrics for models as a whole
    overall_metrics = combined_df.groupby("model").apply(calculate_metrics).apply(pd.Series)

    # Metrics for models playing as Red
    red_metrics = red_df.groupby("model").apply(calculate_metrics).apply(pd.Series)
    red_metrics['role'] = 'red'

    # Metrics for models playing as Blue
    blue_metrics = blue_df.groupby("model").apply(calculate_metrics).apply(pd.Series)
    blue_metrics['role'] = 'blue'

    # Save the results to an Excel file with three sheets
    with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
        overall_metrics.to_excel(writer, sheet_name='Overall', index=False)
        red_metrics.to_excel(writer, sheet_name='Red', index=False)
        blue_metrics.to_excel(writer, sheet_name='Blue', index=False)


In [None]:
for model in ["vsclaude", "vsgpt", "vsgrok"]:
    input_path = r"C:\Users\miche\OneDrive\Desktop\codenames data backup\bestmodel\{}.xlsx".format(model)
    output_path = r"C:\Users\miche\OneDrive\Desktop\codenames data backup\bestmodel\{}_stats.xlsx".format(model)

    process_tournament_data(input_path, output_path)

# COT DATA PROCESS

In [20]:
# Load the data from the uploaded file
file_path = 'experiment_data\cot_data\cot_data_input.xlsx'
df = pd.read_excel(file_path)
i = 0
# Loop through each configuration in the data file
for index, row in df.iterrows():
    i += 1
    red_cot = row['red_cot']
    blue_cot = row['blue_cot']
    playable = True
    already_played = pd.notna(row['winner'])
    # Call the play_game function with parameters from the row

    if not already_played and playable:  # Check if both values are not NaN
        try:

            print(f"playing game {i}")
            result = codenamesLLM.play_game(lang="eng", n_cards=25, coloured_cards=7, k_cards=1, verbose=False, 
              red_master_model="gpt-4o-mini", red_guesser_model=False, 
              red_cot=red_cot, red_cot_guesser=False, 
              blue_master_model="gpt-4o-mini", blue_guesser_model=False, 
              blue_cot=blue_cot, blue_cot_guesser=False, 
              red_agents=3, blue_agents=3, masterverbose=False)

            red_stats = codenamesLLM.analyze_team_guesses(result[3], "RED")
            blue_stats = codenamesLLM.analyze_team_guesses(result[3], "BLUE")

            df.at[index, 'winner'] = result[0]
            df.at[index, 'red_avg_words_2guess'] = red_stats['average_expected_guesses']
            df.at[index, 'blue_avg_words_2guess'] = blue_stats['average_expected_guesses']
            df.at[index, 'red_avg_words_guessed'] = red_stats['average_correct_guesses']
            df.at[index, 'blue_avg_words_guessed'] = blue_stats['average_correct_guesses']
            df.at[index, 'reason'] = result[1]
            df.at[index, 'red_turns'] = red_stats['total_hints']
            df.at[index, 'blue_turns'] = blue_stats['total_hints']
            df.at[index, 'red_cib'] = result[4]
            df.at[index, 'blue_cib'] = result[5]    
        except Exception as e:
            print(f"skipped game: {e}")
    df.to_excel('experiment_data\cot_data\cot_data_fra.xlsx', index=False)

print(f"Results saved to experiment_data\cot_data\cot_data_fra.xlsx")

  file_path = 'experiment_data\cot_data\cot_data_input.xlsx'
  df.to_excel('experiment_data\cot_data\cot_data_fra.xlsx', index=False)
  print(f"Results saved to experiment_data\cot_data\cot_data_fra.xlsx")


playing game 1


  df.at[index, 'winner'] = result[0]
  df.at[index, 'reason'] = result[1]


playing game 2
playing game 3
playing game 4
playing game 5
playing game 6
playing game 7
playing game 8
playing game 9
playing game 10
playing game 11
playing game 12
playing game 13
playing game 14
playing game 15
playing game 16
playing game 17
playing game 18
playing game 19
playing game 20
playing game 21
playing game 22
playing game 23
playing game 24
playing game 25
playing game 26
playing game 27
playing game 28
playing game 29
playing game 30
playing game 31
playing game 32
playing game 33
playing game 34
playing game 35
playing game 36
playing game 37
playing game 38
playing game 39
playing game 40
playing game 41
playing game 42
playing game 43
playing game 44
playing game 45
playing game 46
playing game 47
playing game 48
playing game 49
playing game 50
playing game 51
playing game 52
playing game 53
playing game 54
playing game 55
playing game 56
playing game 57
playing game 58
playing game 59
playing game 60
playing game 61
playing game 62
playing game 63
playing game 64


In [10]:
import pandas as pd

def process_tournament_data_by_cot(input_path, output_path):
    # Load the data
    df = pd.read_excel(input_path)

    # Function to calculate metrics
    def calculate_metrics(group):
        wins = group['winner'] == group['role'].str.upper()
        losses = ~wins
        card_finished = group['reason'] == 'cards finished'
        killer_word = group['reason'] == 'killer word selected'

        return {
            "cot": group["cot"].iloc[0],
            "games_played": len(group),
            "wins": wins.sum(),
            "win_percentage": 100 * wins.sum() / len(group),
            "win_by_cards_finished": (wins & card_finished).sum(),
            "wins_by_killer_words": (wins & killer_word).sum(),
            "losses_by_card_finished": (losses & card_finished).sum(),
            "losses_by_killer_words": (losses & killer_word).sum(),
            "average_word_to_guess": group['avg_words_2guess'].mean(),
            "average_word_to_guess_when_wins": group.loc[wins, 'avg_words_2guess'].mean(),
            "average_word_to_guess_when_lose": group.loc[losses, 'avg_words_2guess'].mean(),
            "average_word_guessed": group['avg_words_guessed'].mean(),
            "average_word_guessed_when_wins": group.loc[wins, 'avg_words_guessed'].mean(),
            "average_word_guessed_when_lose": group.loc[losses, 'avg_words_guessed'].mean(),
            "average_turns": group['turns'].mean(),
            "average_turns_when_wins": group.loc[wins, 'turns'].mean(),
            "average_turns_when_lose": group.loc[losses, 'turns'].mean(),
            "total_cib": group['cib'].sum()
        }

    # Reshape the dataset to unify red and blue roles
    red_df = df.rename(columns=lambda x: x.replace('red_', '')).assign(role='red', cot=df['red_cot'])
    blue_df = df.rename(columns=lambda x: x.replace('blue_', '')).assign(role='blue', cot=df['blue_cot'])
    combined_df = pd.concat([red_df, blue_df], ignore_index=True)

    # Metrics for cot
    overall_metrics = combined_df.groupby("cot").apply(calculate_metrics).apply(pd.Series)

    # Metrics for cot in Red role
    red_metrics = red_df.groupby("cot").apply(calculate_metrics).apply(pd.Series)
    red_metrics['role'] = 'red'

    # Metrics for cot in Blue role
    blue_metrics = blue_df.groupby("cot").apply(calculate_metrics).apply(pd.Series)
    blue_metrics['role'] = 'blue'

    # Save the results to an Excel file with three sheets
    with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
        overall_metrics.to_excel(writer, sheet_name='Overall', index=False)
        red_metrics.to_excel(writer, sheet_name='Red', index=False)
        blue_metrics.to_excel(writer, sheet_name='Blue', index=False)


In [8]:
# Example usage:
input_path = 'experiment_data\cot_data\cot_data_input.xlsx'
output_path = 'experiment_data\cot_data\cot_data_fra.xlsx'
process_tournament_data_by_cot(input_path, output_path)

  input_path = 'experiment_data\cot_data\cot_data_input.xlsx'
  output_path = 'experiment_data\cot_data\cot_data_fra.xlsx'
  overall_metrics = combined_df.groupby("cot").apply(calculate_metrics).apply(pd.Series)
  red_metrics = red_df.groupby("cot").apply(calculate_metrics).apply(pd.Series)
  blue_metrics = blue_df.groupby("cot").apply(calculate_metrics).apply(pd.Series)


# AGENTS NUMBER DATA PROCESS


In [None]:
import pandas as pd

def process_agents_data(input_path, output_path):
    # Load the data
    df = pd.read_excel(input_path)

    # Function to calculate metrics
    def calculate_metrics(group, role):
        prefix = 'red_' if role == 'red' else 'blue_'
        wins = (group['winner'] == role.upper())
        losses = ~wins
        card_finished = group['reason'] == 'cards finished'
        killer_word = group['reason'] == 'killer word selected'

        return {
            "number_of_agents": group[f"number_{role}_agents"].iloc[0],
            "games_played": len(group),
            "wins": wins.sum(),
            "win_percentage": 100 * wins.sum() / len(group),
            "win_by_cards_finished": (wins & card_finished).sum(),
            "wins_by_killer_words": (wins & killer_word).sum(),
            "losses_by_card_finished": (losses & card_finished).sum(),
            "losses_by_killer_words": (losses & killer_word).sum(),
            "average_word_to_guess": group[f'{prefix}avg_words_2guess'].mean(),
            "average_word_to_guess_when_wins": group.loc[wins, f'{prefix}avg_words_2guess'].mean(),
            "average_word_to_guess_when_lose": group.loc[losses, f'{prefix}avg_words_2guess'].mean(),
            "average_word_to_guess_when_wins_by_ending_cards": group.loc[wins & card_finished, f'{prefix}avg_words_2guess'].mean(),
            "average_word_to_guess_when_loses_by_ending_cards": group.loc[losses & card_finished, f'{prefix}avg_words_2guess'].mean(),
            "average_word_to_guess_when_wins_by_killer_card": group.loc[wins & killer_word, f'{prefix}avg_words_2guess'].mean(),
            "average_word_to_guess_when_loses_by_killer_card": group.loc[losses & killer_word, f'{prefix}avg_words_2guess'].mean(),
            "average_word_guessed": group[f'{prefix}avg_words_guessed'].mean(),
            "average_word_guessed_when_wins": group.loc[wins, f'{prefix}avg_words_guessed'].mean(),
            "average_word_guessed_when_lose": group.loc[losses, f'{prefix}avg_words_guessed'].mean(),
            "average_word_guessed_when_wins_by_ending_cards": group.loc[wins & card_finished, f'{prefix}avg_words_guessed'].mean(),
            "average_word_guessed_when_loses_by_ending_cards": group.loc[losses & card_finished, f'{prefix}avg_words_guessed'].mean(),
            "average_word_guessed_when_wins_by_killer_card": group.loc[wins & killer_word, f'{prefix}avg_words_guessed'].mean(),
            "average_word_guessed_when_loses_by_killer_card": group.loc[losses & killer_word, f'{prefix}avg_words_guessed'].mean(),
            "average_turns": group[f'{prefix}turns'].mean(),
            "average_turns_when_wins": group.loc[wins, f'{prefix}turns'].mean(),
            "average_turns_when_lose": group.loc[losses, f'{prefix}turns'].mean(),
            "average_turns_when_wins_by_ending_cards": group.loc[wins & card_finished, f'{prefix}turns'].mean(),
            "average_turns_when_loses_by_ending_cards": group.loc[losses & card_finished, f'{prefix}turns'].mean(),
            "average_turns_when_wins_by_killer_cards": group.loc[wins & killer_word, f'{prefix}turns'].mean(),
            "average_turns_when_loses_by_killer_cards": group.loc[losses & killer_word, f'{prefix}turns'].mean(),
            "total_cib": group[f'{prefix}cib'].sum()
        }

    # Metrics for agents playing as Red
    red_metrics = df.groupby("number_red_agents").apply(lambda g: calculate_metrics(g, "red")).apply(pd.Series)
    red_metrics['role'] = 'red'

    # Metrics for agents playing as Blue
    blue_metrics = df.groupby("number_blue_agents").apply(lambda g: calculate_metrics(g, "blue")).apply(pd.Series)
    blue_metrics['role'] = 'blue'

    # Combine and calculate overall metrics
    overall_metrics = (
        pd.concat([red_metrics, blue_metrics])
        .groupby("number_of_agents")
        .agg({
            "games_played": "sum",
            "wins": "sum",
            "win_percentage": "mean",
            "win_by_cards_finished": "sum",
            "wins_by_killer_words": "sum",
            "losses_by_card_finished": "sum",
            "losses_by_killer_words": "sum",
            "average_word_to_guess": "mean",
            "average_word_to_guess_when_wins": "mean",
            "average_word_to_guess_when_lose": "mean",
            "average_word_to_guess_when_wins_by_ending_cards": "mean",
            "average_word_to_guess_when_loses_by_ending_cards": "mean",
            "average_word_to_guess_when_wins_by_killer_card": "mean",
            "average_word_to_guess_when_loses_by_killer_card": "mean",
            "average_word_guessed": "mean",
            "average_word_guessed_when_wins": "mean",
            "average_word_guessed_when_lose": "mean",
            "average_word_guessed_when_wins_by_ending_cards": "mean",
            "average_word_guessed_when_loses_by_ending_cards": "mean",
            "average_word_guessed_when_wins_by_killer_card": "mean",
            "average_word_guessed_when_loses_by_killer_card": "mean",
            "average_turns": "mean",
            "average_turns_when_wins": "mean",
            "average_turns_when_lose": "mean",
            "average_turns_when_wins_by_ending_cards": "mean",
            "average_turns_when_loses_by_ending_cards": "mean",
            "average_turns_when_wins_by_killer_cards": "mean",
            "average_turns_when_loses_by_killer_cards": "mean",
            "total_cib": "sum"
        })
        .reset_index()
    )

    # Save the results to an Excel file with three sheets
    with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
        overall_metrics.to_excel(writer, sheet_name='Overall', index=False)
        red_metrics.to_excel(writer, sheet_name='Red', index=False)
        blue_metrics.to_excel(writer, sheet_name='Blue', index=False)

In [8]:
# Define input and output file paths
input_path = r'experiment_data\n_agents_data\agents_data.xlsx'  # Replace with your input file path
output_path = r'experiment_data\n_agents_data\agents_data_stats.xlsx'  # Replace with your output file path

# Run the process
process_agents_data(input_path, output_path)

  red_metrics = df.groupby("number_red_agents").apply(lambda g: calculate_metrics(g, "red")).apply(pd.Series)
  blue_metrics = df.groupby("number_blue_agents").apply(lambda g: calculate_metrics(g, "blue")).apply(pd.Series)
