In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2"  # Set to the GPU you want to use

In [None]:
import re
import datetime
import traceback
from tqdm import tqdm
import random
import pickle
import collections
from functools import partial
import numpy as np
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import lovely_tensors as lt
lt.monkey_patch()

from llmg.utils.mix import seed_all, prepare_dict_for_saving
from llmg.chameleon.NaturalLanguageTalker.naturallanguagetalker import NaturalLanguageTalker
from llmg.chameleon.NaturalLanguageTalker.HuggingfaceTalker import HuggingfaceTalker
from llmg.chameleon.NaturalLanguageTalker.OpenaiTalker import OpenaiTalker
from llmg.chameleon.NaturalLanguageTalker.GoogleGenaiTalker import GoogleGenaiTalker
from llmg.chameleon.GamePlay import GamePlay
from llmg.chameleon.constants import RESPOND_PROMPT
from llmg.chameleon.utils import (
    load_game_logs,
)

In [None]:
# Global config
cfg = {
    "seed": 0,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "run_time": datetime.datetime.now().strftime("%Y-%m-%d_%H-%M"),
}

## Collect game data

### Configuration

In [None]:
# Data config
cfg["data"] = {
    # ### Chameleon model config
    # "chameleon_cls": GoogleGenaiTalker,
    # # "chameleon_cls": OpenaiTalker,
    # "chameleon_kwargs": {
    #     "model_id": {
    #         "GPT-4o mini": "gpt-4o-mini-2024-07-18",
    #         "GPT-4o": "gpt-4o-2024-08-06",
    #         "GPT-4.1": "gpt-4.1-2025-04-14",
    #         "GPT-5": "gpt-5-2025-08-07",
    #         "Gemini-2.5-Flash": "gemini-2.5-flash",
    #         "Gemini-2.5-Pro": "gemini-2.5-pro",
    #     }[(model_name := "Gemini-2.5-Pro")], # Change this to your model of choice !
    #     # "api_key": os.environ["OPENAI_API_KEY"],
    #     "api_key": os.environ["GOOGLE_GENAI_API_KEY"],
    #     "start_conversation": False,
    #     "additional_generation_kwargs": {
    #         "GPT-4o mini": {
    #             "max_output_tokens": 20,
    #             "temperature": 0,
    #         },
    #         "GPT-4o": {
    #             "max_output_tokens": 20,
    #             "temperature": 0,
    #         },
    #         "GPT-4.1": {
    #             "max_output_tokens": 20,
    #             "temperature": 0,
    #         },
    #         "GPT-5": {
    #             "max_output_tokens": 5000,
    #             "temperature": 1,
    #             "reasoning": {"effort": "low"},
    #         },
    #         "Gemini-2.5-Flash": {
    #             "max_output_tokens": 20,
    #             "temperature": 0,
    #             "thinking_config": {
    #                 "thinking_budget": 0,
    #             },
    #         },
    #         "Gemini-2.5-Pro": {
    #             "max_output_tokens": 512 + 20,
    #             "temperature": 0,
    #             "thinking_config": {
    #                 "thinking_budget": 512,
    #             },
    #         },
    #     }[model_name],
    # },
    "chameleon_cls": HuggingfaceTalker,
    "chameleon_kwargs": {
        # "model_id": "Qwen/Qwen3-32B-AWQ",
        "model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
        "model": None,  # Will be set later
        "tokenizer": None,  # Will be set later
        "additional_generation_kwargs": {
            "max_new_tokens": 20,
            "do_sample": False,
            "temperature": None,
            "top_p": None,
            "top_k": None,
        },
        "additional_generation_kwargs_hook_fns": None,
        "hidden_states_layer_idx": 40, # llama-3.1-70B
        # "hidden_states_layer_idx": [30, 40], # qwen3 32b
        "hidden_states_token_idx": 0,
        "start_conversation": False,
    },

    
    ### Non-chameleon model config
    # # "truthful_cls": OpenaiTalker,
    # "truthful_cls": GoogleGenaiTalker,
    # "truthful_kwargs": {
    #     "model_id": {
    #         "GPT-4o mini": "gpt-4o-mini-2024-07-18",
    #         "GPT-4o": "gpt-4o-2024-08-06",
    #         "GPT-4.1": "gpt-4.1-2025-04-14",
    #         "GPT-5": "gpt-5-2025-08-07",
    #         "Gemini-2.5-Flash": "gemini-2.5-flash",
    #         "Gemini-2.5-Pro": "gemini-2.5-pro",
    #     }[(model_name := "Gemini-2.5-Pro")], # Change this to your model of choice !
    #     # "api_key": os.environ["OPENAI_API_KEY"],
    #     "api_key": os.environ["GOOGLE_GENAI_API_KEY"],
    #     "start_conversation": False,
    #     "additional_generation_kwargs": {
    #         "GPT-4o mini": {
    #             "max_output_tokens": 20,
    #             "temperature": 0,
    #         },
    #         "GPT-4o": {
    #             "max_output_tokens": 20,
    #             "temperature": 0,
    #         },
    #         "GPT-4.1": {
    #             "max_output_tokens": 20,
    #             "temperature": 0,
    #         },
    #         "GPT-5": {
    #             "max_output_tokens": 5000,
    #             "temperature": 1,
    #             "reasoning": {"effort": "low"},
    #         },
    #         "Gemini-2.5-Flash": {
    #             "max_output_tokens": 20,
    #             "temperature": 0,
    #             "thinking_config": {
    #                 "thinking_budget": 0,
    #             },
    #         },
    #         "Gemini-2.5-Pro": {
    #             "max_output_tokens": 512 + 20,
    #             "temperature": 0,
    #             "thinking_config": {
    #                 "thinking_budget": 512,
    #             },
    #         },
    #     }[model_name],
    # },
    "truthful_cls": HuggingfaceTalker,
    "truthful_kwargs": {
        # "model_id": "Qwen/Qwen3-32B-AWQ",
        "model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4",
        "model": None,  # Will be set later
        "tokenizer": None,  # Will be set later
        "additional_generation_kwargs": {
            "max_new_tokens": 20,
            "do_sample": False,
            "temperature": None,
            "top_p": None,
            "top_k": None,
        },
        "additional_generation_kwargs_hook_fns": None,
        # "additional_generation_kwargs_hook_fns": [ # Temperature=1
        #     lambda locs: \
        #         dict() if len(locs["messages"]) == 0 or RESPOND_PROMPT[-40:].lower() not in locs["messages"][-1]["content"].lower() \
        #         else {"temperature": 1.0, "do_sample": True}
        # ],
        "hidden_states_layer_idx": 40,
        "hidden_states_token_idx": 0,
        "start_conversation": False,
        # "steering_kwargs": {
        #     "steering_vector": torch.load(os.path.join(os.environ["DATA_DIR"], "chameleon", f"steering_vector_2025-08-15_15-31.pt"), map_location="cpu"),
        #     "steering_strength": -16,  # Adjust the steering strength as needed (negative for more concealing, positive for more information-revealing)
        #     "layer_index": 40,  # Layer to apply steering
        #     "token_index": -1,  # Token index to apply steering
        #     "selection_fn": lambda locs: \
        #         RESPOND_PROMPT[-40:].lower() in locs["messages"][-1]["content"].lower() if "messages" in locs and len(locs["messages"]) > 0 else False,
        # },
    },

    "num_of_trials": 100,
    "num_of_players": 4,
    "num_possible_words": 16,
    "chameleon_cards_path": "chameleon_cards.pkl",
    "add_intro_msg": False,
    "send_intro_msg_to_llm": False,
    "nonchameleons_response_word_instruction": None, # Instruction to conceal
    # "nonchameleons_response_word_instruction": "On a scale of 1 to 10, where 1 is very cryptic and concealing, and 10 is highly information-revealing, your strategy should be around 1.",

    "start_at_trial": 0, # Which trial number to start from
    "load_game_inits_from": None,

    "save_subpath": f"chameleon/{cfg['run_time']}",
}

In [None]:
# Load game configurations from other game logs
cfg["data"]["load_game_inits_from"] = None
# cfg["data"]["load_game_inits_from"] = os.path.join(os.environ["DATA_DIR"], "chameleon", "2025-08-13_19-55")

if cfg["data"]["load_game_inits_from"] is not None:
    ckpt_game_logs = load_game_logs(
        [cfg["data"]["load_game_inits_from"]],
        layer_to_probe=None,
        token_to_probe=None,
        max_games=cfg["data"]["num_of_trials"],
        verbose=0,
    )
    print(f"Loaded {len(ckpt_game_logs)} checkpoint game logs from {cfg['data']['load_game_inits_from']}")
else:
    print(f"Not loading any checkpoint game logs.")

In [None]:
# Load models and tokenizers
from transformers import modeling_utils
if not hasattr(modeling_utils, "ALL_PARALLEL_STYLES") or modeling_utils.ALL_PARALLEL_STYLES is None:
    modeling_utils.ALL_PARALLEL_STYLES = ["tp", "none","colwise",'rowwise']

if cfg["data"]["chameleon_cls"] == HuggingfaceTalker:
    cfg["data"]["chameleon_kwargs"]["tokenizer"] = AutoTokenizer.from_pretrained(
        cfg["data"]["chameleon_kwargs"]["model_id"],
        trust_remote_code=True
    )
    cfg["data"]["chameleon_kwargs"]["model"] = AutoModelForCausalLM.from_pretrained(
        cfg["data"]["chameleon_kwargs"]["model_id"],
        trust_remote_code=True,
        device_map="auto"
    )
    cfg["data"]["chameleon_kwargs"]["model"] = cfg["data"]["chameleon_kwargs"]["model"].eval()
elif cfg["data"]["chameleon_cls"] == OpenaiTalker:
    print(f"[INFO] Using OpenAI API for Chameleon model")
elif cfg["data"]["chameleon_cls"] == GoogleGenaiTalker:
    print(f"[INFO] Using Google GenAI API for Chameleon model")
else:
    raise ValueError(f"Unsupported chameleon player type: {cfg['data']['chameleon_cls']}")

if cfg["data"]["truthful_cls"] == HuggingfaceTalker:
    if cfg["data"]["chameleon_kwargs"]["model_id"] == cfg["data"]["truthful_kwargs"]["model_id"]:
        # Use shared model with different message history
        print(f"[INFO] Using the same model for Chameleon and Truthful player: {cfg['data']['chameleon_kwargs']['model_id']}")
        cfg["data"]["truthful_kwargs"]["tokenizer"] = cfg["data"]["chameleon_kwargs"]["tokenizer"]
        cfg["data"]["truthful_kwargs"]["model"] = cfg["data"]["chameleon_kwargs"]["model"]
    else:
        # Load separate model for Truthful player
        cfg["data"]["truthful_kwargs"]["tokenizer"] = AutoTokenizer.from_pretrained(
            cfg["data"]["truthful_kwargs"]["model_id"],
            trust_remote_code=True
        )
        cfg["data"]["truthful_kwargs"]["model"] = AutoModelForCausalLM.from_pretrained(
            cfg["data"]["truthful_kwargs"]["model_id"],
            trust_remote_code=True,
            device_map="auto"
        )
        cfg["data"]["truthful_kwargs"]["model"].eval()
elif cfg["data"]["truthful_cls"] == OpenaiTalker:
    print(f"[INFO] Using OpenAI API for Truthful model")
elif cfg["data"]["truthful_cls"] == GoogleGenaiTalker:
    print(f"[INFO] Using Google GenAI API for Truthful model")
else:
    raise ValueError(f"Unsupported truthful player type: {cfg['data']['truthful_cls']}")

### Gameplay

In [None]:
# Set up data collection configuration and tracking
tested_chameleon_types = [
    [cfg["data"]["chameleon_cls"], cfg["data"]["chameleon_kwargs"]]
]
tested_truthful_types = [
    [cfg["data"]["truthful_cls"], cfg["data"]["truthful_kwargs"]]
]
all_results = dict() # Dictionary to store the results. Each key corresponds to a chameleon-truthful player type combination
verbose = True
compute_posterior_probabilities = False # Set True to compute the posterior probabilities for each response

# Prepare the save directory
cfg["data"]["save_to_dir"] = os.path.join(os.environ["DATA_DIR"], cfg["data"]["save_subpath"])
os.makedirs(cfg["data"]["save_to_dir"], exist_ok=True)
print(f"Generating data for {cfg['data']['num_of_trials']} trials")
print(f"Saving to {cfg['data']['save_to_dir']}")

In [None]:
# Set the chameleon truthful player combination
global_run_idx = 0
for chameleon_type in tested_chameleon_types:
    for truthful_type in tested_truthful_types:
        seed_all(cfg["seed"])
        
        # Counts for the game result stats
        num_of_valid_trials = 0
        num_of_chameleon_identified = 0
        num_of_chameleon_loses = 0

        results = dict() # Dictionary to store the results of each combination
        game_logs = [] # List to keep each game log

        print(f'Chameleon type: {chameleon_type[0].__name__} {chameleon_type[1]["model_id"]}, Truthful type: {truthful_type[0].__name__} {truthful_type[1]["model_id"]}')
        for run in tqdm(range(cfg["data"]["start_at_trial"], cfg["data"]["num_of_trials"]), desc='Games played'):
            # Determine the chameleon index uniformly randomly
            chameleon_index = random.randint(0, cfg["data"]["num_of_players"] - 1)

            # Initiate players based on identities
            players = []
            for i in range(cfg["data"]["num_of_players"]):
                if i == chameleon_index:
                    players.append(chameleon_type[0](**chameleon_type[1]))
                else:
                    players.append(truthful_type[0](**truthful_type[1]))

            # Initiate conversation
            for player in players:
                player.start_conversation(
                    add_intro_message=cfg["data"]["add_intro_msg"],
                    send_to_assistant=cfg["data"]["send_intro_msg_to_llm"],
                )

            # Create the game. The category and the secret word are chosen
            game = GamePlay(
                players,
                chameleon_index=chameleon_index,
                num_of_possible_words=cfg["data"]["num_possible_words"],
                chameleon_cards_path=cfg["data"]["chameleon_cards_path"],
                init_from_ckpt=ckpt_game_logs[run] if cfg["data"]["load_game_inits_from"] is not None else None,
                nonchameleons_response_word_instruction=cfg["data"]["nonchameleons_response_word_instruction"],
            )

            # Game result
            game_result, game, explanation, last_responses = game.play()

            if compute_posterior_probabilities:
                # Create auxilary SCoRe player to compute posterior probabilities
                aux_player = SCoRe()
                aux_player.get_category(game.category, game.possible_words)
                aux_player.responses = [game.word_responses[i] for i in range(1, game.num_of_players+1)]

                # Compute the posterior probabilities for each response
                initial_prior_probabilities = np.ones(len(game.possible_words)) / len(game.possible_words)
                probability_list = []
                probability_list.append(initial_prior_probabilities)
                for i in range(len(game.word_responses) - 1):
                    responses_without_chameleon = [game.word_responses[j] for j in range(1, game.num_of_players+1) if j != game.chameleon_index+1]
                    posterior_probabilities = aux_player.compute_posterior_probabilities(initial_prior_probabilities, responses_without_chameleon[:i+1])
                    probability_list.append(posterior_probabilities)

                print(probability_list)

            # Result counter from the truthful players' pov
            if game_result == 'IdentifiedWin':
                num_of_chameleon_identified += 1
                num_of_chameleon_loses += 1
                num_of_valid_trials += 1
            elif game_result == 'IdentifiedLoss':
                num_of_chameleon_identified += 1
                num_of_valid_trials += 1
            elif game_result == 'MisidentifiedLoss':
                num_of_valid_trials += 1

            if verbose:
                print("=================================================================================")
                print('Game result: ' + game_result)
                print('Explanation: ' + explanation)
                print('Category: ' + game.category)
                print('Possible words: ' + str(game.possible_words))
                print('Secret word: ' + game.secret_word)
                print('Chameleon index: ' + str(game.chameleon_index + 1))
                print('Player types: ' + str([player.__class__.__name__ + ' ' + player.model_id for player in game.players]))
                print('Game word responses: ' + str(game.word_responses))
                print('Votes: ' + str(game.votes))
                print('Voted chameleon: ' + str(game.voted_chameleon))
                print('Chameleon response: ' + str(game.chameleon_response))
                print(f'Game played {run+1} times.')
                print(f'Valid games: {num_of_valid_trials} out of {run+1}')
                print(f'Number of times the chameleon was identified: {num_of_chameleon_identified}')
                print(f'Number of times the chameleon loses: {num_of_chameleon_loses}')
                print("=================================================================================")

            game_dict = {'game_result': game_result, 'explanation': explanation, 'category': game.category, 'possible_words': game.possible_words, 'secret_word': game.secret_word, 'chameleon_index': game.chameleon_index, 'word_responses': game.word_responses, 'votes': game.votes, 'voted_chameleon': game.voted_chameleon, 'chameleon_response': game.chameleon_response, 'messages': [player.messages for player in game.players]}
            game_dict['player_types'] = [player.__class__.__name__ + ' ' +  player.model_id for player in game.players]
            game_dict['config'] = prepare_dict_for_saving(cfg["data"])
            if compute_posterior_probabilities:
                game_dict['posterior_probabilities'] = probability_list

            # Save
            if cfg["data"]["save_to_dir"] is not None:
                with open(os.path.join(cfg["data"]["save_to_dir"], f'game_{global_run_idx}.pkl'), 'wb') as f:
                    pickle.dump(game_dict, f)
            else:
                game_logs.append(game_dict)
            global_run_idx += 1

        # Print results for the chameleon-truthful player combination
        print(f'Chameleon type: {chameleon_type[0].__name__} {chameleon_type[1]}, Truthful type: {truthful_type[0].__name__} {truthful_type[1]}')
        print(f'Number of games: {cfg["data"]["num_of_trials"]}')
        print(f'Valid games: {num_of_valid_trials}')
        print(f'Number of times the chameleon was identified: {num_of_chameleon_identified}')
        print(f'Number of times the chameleon loses: {num_of_chameleon_loses}')
        if num_of_valid_trials > 0:
            print(f'Non-chameleon win ratio: {num_of_chameleon_loses/max(1,num_of_valid_trials)}')
            print(f'Identification ratio {num_of_chameleon_identified/max(1,num_of_valid_trials)}')
        if num_of_chameleon_identified > 0:
            print(f'Second round win ratio {1 - num_of_chameleon_loses/max(1,num_of_chameleon_identified)}')
        print('\n')

        # Save the results in the dictionary
        results['game_logs'] = game_logs
        results['num_of_players'] = cfg["data"]["num_of_players"]
        results['num_of_trials'] = cfg["data"]["num_of_trials"]
        results['num_of_valid_trials'] = num_of_valid_trials
        results['num_of_chameleon_identified'] = num_of_chameleon_identified
        results['num_of_chameleon_loses'] = num_of_chameleon_loses
        all_results[(chameleon_type[0].__name__ + ' ' +  str(chameleon_type[1]), truthful_type[0].__name__ + ' ' + str(truthful_type[1]) )] = results

        # Save the game logs in a pickle file with timestamp
        with open(os.path.join(cfg["data"]["save_to_dir"], f'partial_results.pkl'), 'wb') as f:
            pickle.dump(all_results, f)

# Save the game logs in a pickle file
with open(os.path.join(cfg["data"]["save_to_dir"], f'final_results.pkl'), 'wb') as f:
    pickle.dump(all_results, f)