# Strategy Score on Output (using GPT-4o)

In [24]:
import os
import sys
import json
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import requests
import pandas as pd

from pandas import DataFrame, json_normalize
from typing import List, Dict, Any, Tuple, Union, Optional

LOGS_PATH: str = "../evaluations/results/"

In [25]:
import dotenv
dotenv.load_dotenv()

True

In [26]:
sys.path.append("..")

from utils import load_agent_logs_df, read_jsonl_as_json, load_game_summary

In [27]:
EXPT_NAMES: List[str] = [
    "2025-01-30_phi_llama_100_games_v2",
    "2025-01-30_llama_phi_100_games_v2",
    "2025-01-30_phi_phi_100_games_v2",
    "2025-01-30_llama_llama_100_games_v2",
    ]

In [28]:
DESCRIPTIONS: List[str] = [
    "Crew: Phi, Imp: Llama",
    "Crew: Llama, Imp: Phi",
    "Crew: Phi, Imp: Phi",
    "Crew: Llama, Imp: Llama",
    ]

In [29]:
summary_logs_paths: List[str] = [
    os.path.join(LOGS_PATH, f"{expt_name}_strategy_skill.json") for expt_name in EXPT_NAMES
]

In [30]:
summary_dfs: List[DataFrame] = []

for summary_logs_path in summary_logs_paths:
    # read json line by line
    summary_logs: List[Dict[str, Any]] = read_jsonl_as_json(summary_logs_path)
    summary_df: DataFrame = json_normalize(summary_logs)
    # sort by game_index and then step
    summary_df = summary_df.sort_values(by=["game_index", "step"])
    summary_dfs.append(summary_df)
    print(f"Loaded {len(summary_df)} logs from {summary_logs_path}")

Loaded 8701 logs from ../evaluations/results/2025-01-30_phi_llama_100_games_v2_strategy_skill.json
Loaded 7911 logs from ../evaluations/results/2025-01-30_llama_phi_100_games_v2_strategy_skill.json
Loaded 8574 logs from ../evaluations/results/2025-01-30_phi_phi_100_games_v2_strategy_skill.json
Loaded 7782 logs from ../evaluations/results/2025-01-30_llama_llama_100_games_v2_strategy_skill.json


In [31]:
summary_dfs[0].head()

Unnamed: 0,game_index,step,player_name,player_identity,memory,action,thought,strategy_score,explanation
12,Game 1,0,Player 1: red,Impostor,No observations or actions have been made yet....,MOVE from Cafeteria to Medbay,"Since I am in a room with multiple Crewmates, ...",7,The player is strategically avoiding a risky k...
288,Game 1,0,Player 4: orange,Crewmate,"- This is the beginning of the game, with no p...",MOVE from Cafeteria to Admin,"- It's the start of the game, so I need to mak...",8,The player is making a well-thought-out move b...
297,Game 1,0,Player 2: yellow,Impostor,No previous actions or observations have been ...,MOVE from Cafeteria to Medbay,"Since I am an Impostor, my goal is to eliminat...",6,The player is making a strategic decision by m...
305,Game 1,0,Player 5: green,Crewmate,- The game is in the task phase. - Current loc...,COMPLETE TASK - Download Data,- My current objective is to complete tasks ef...,7,"The player is strategically completing the ""Do..."
393,Game 1,0,Player 6: cyan,Crewmate,- No actions have been taken yet. - I have thr...,MOVE from Cafeteria to Admin,- Since I need to complete tasks in Electrical...,7,The player is acting strategically by moving t...


In [32]:
# plot distribution of strategy_score for player_identity Impostor for each experiment

fig = go.Figure()

for i, summary_df in enumerate(summary_dfs):
    fig.add_trace(go.Violin(
        x=[DESCRIPTIONS[i]] * len(summary_df),
        y=np.sort(np.array(summary_df[summary_df["player_identity"] == "Impostor"]["strategy_score"], dtype=np.float64)),
        name=DESCRIPTIONS[i],
        box_visible=True,
        meanline_visible=True
    ))
    
fig.update_layout(
    title="Strategy Scores for Impostors",
    yaxis_title="Strategy Score",
    xaxis_title="",
    showlegend=True
)

# don't show x-axis labels
fig.update_xaxes(showticklabels=False)


fig.update_layout({'plot_bgcolor': 'rgba(255, 255, 255, 1)',})
# show fine grid lines on both axes on both subplotsß
# fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# legend inside the plot in a box
fig.update_layout(legend=dict(x=1, y=1, traceorder="normal", bgcolor="white", bordercolor="black", borderwidth=1))

# width and height
fig.update_layout(width=750, height=500)

# make y axis start from 0
# fig.update_yaxes(range=[0, 0.6])
fig.update_yaxes(range=[0, 10])

# # ticks on both axes
# fig.update_xaxes(tickmode='linear', tick0=0, dtick=500)
fig.update_yaxes(tickmode='linear', tick0=0, dtick=1)

# everthing latex font (for research paper)
fig.update_layout(font=dict(family='serif', size=15, color='black'))
fig.update_xaxes(title_font=dict(family='serif', size=18, color='black'))
fig.update_yaxes(title_font=dict(family='serif', size=18, color='black'))
fig.update_xaxes(tickfont=dict(family='serif', size=18, color='black'))
fig.update_yaxes(tickfont=dict(family='serif', size=18, color='black'))
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=False)

fig.show()

In [33]:
# plot distribution of strategy_score for player_identity Crewmate for each experiment

fig = go.Figure()

for i, summary_df in enumerate(summary_dfs):
    fig.add_trace(go.Violin(
        x=[DESCRIPTIONS[i]] * len(summary_df),
        y=np.sort(np.array(summary_df[summary_df["player_identity"] == "Crewmate"]["strategy_score"], dtype=np.float64)),
        name=DESCRIPTIONS[i],
        box_visible=True,
        meanline_visible=True
    ))
    
fig.update_layout(
    title="Strategy Scores for Crewmates",
    yaxis_title="Strategy Score",
    xaxis_title="",
    showlegend=True
)

# don't show x-axis labels
fig.update_xaxes(showticklabels=False)


fig.update_layout({'plot_bgcolor': 'rgba(255, 255, 255, 1)',})
# show fine grid lines on both axes on both subplotsß
# fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

# legend inside the plot in a box
fig.update_layout(legend=dict(x=1, y=1, traceorder="normal", bgcolor="white", bordercolor="black", borderwidth=1))

# width and height
fig.update_layout(width=750, height=500)

# make y axis start from 0
# fig.update_yaxes(range=[0, 0.6])
fig.update_yaxes(range=[0, 10])

# # ticks on both axes
# fig.update_xaxes(tickmode='linear', tick0=0, dtick=500)
fig.update_yaxes(tickmode='linear', tick0=0, dtick=1)

# everthing latex font (for research paper)
fig.update_layout(font=dict(family='serif', size=15, color='black'))
fig.update_xaxes(title_font=dict(family='serif', size=18, color='black'))
fig.update_yaxes(title_font=dict(family='serif', size=18, color='black'))
fig.update_xaxes(tickfont=dict(family='serif', size=18, color='black'))
fig.update_yaxes(tickfont=dict(family='serif', size=18, color='black'))
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=False)

fig.show()