In [1]:
import pandas as pd

personality = pd.read_pickle("../result_delay/personality_texts_0_en_talk.pickle")

In [2]:
personality

['This player is cool-headed and unflappable, unlikely to be swayed by guilt or anxiety. Low agreeableness makes them skeptical and self-interested: they favor competitiveness over harmony, are blunt, and readily exploit trusting partners. Low conscientiousness adds inconsistency and impulsivity—plans and promises are unreliable, and they may switch tactics without much deliberation. In repeated Prisoner’s Dilemma play they tend to defect early and opportunistically, taking advantage of others when it benefits them, but they won’t obsess over retaliation or remorse. Overall they’re pragmatic and aloof: calm under pressure, strategically self-serving, and unpredictable in follow-through.',
 'Low agreeableness, low conscientiousness, and high neuroticism produce a distrustful, emotionally volatile player in the Prisoner’s Dilemma. They prioritize immediate self-interest, are unsympathetic to partners, and default to defecting rather than cooperating. Impulsivity and poor follow-through m

In [8]:
import json
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional
import heapq
from datetime import timedelta, datetime
import pickle
import os
import random
import itertools

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain.chat_models import init_chat_model  # LangChain 0.2以降

# LangChain 出力パーサ
parser = JsonOutputParser()

# ===== Personality description generator =====
def generate_personality_description(big5: dict, llm, length: int = 100) -> str:    
    system_prompt = f"""You are required to generate a personality description for a subject 
    in a Prisoner’s Dilemma game, based on the Big Five traits.
    The description should be around {length} words.
    The factors Neuroticism (N), Conscientiousness (C), Extraversion (E), Agreeableness (A), 
    and Openness (O) are each represented by values from -1 to 1."""

    user_prompt = f"Big Five personality traits: {json.dumps(big5)}\nPlease concisely describe the personality in about {length} words."

    # ChatPromptTemplate を避けて、直接 chain を作る
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    resp = llm.invoke(messages)
    return resp.content.strip()

# ===== Utility functions =====
# ノイズを加える処理
def maybe_flip_action(action: str, p: float) -> str:
    if action not in ("C", "D"):
        raise ValueError("action must be 'C' or 'D'")
    if random.random() < p:
        return "D" if action == "C" else "C"
    return action


def all_big5_vectors(keys):
    values = [-1, 1]
    for combo in itertools.product(values, repeat=len(keys)):
        yield dict(zip(keys, combo))


def build_system_prompt(do_talk : bool, delay_sec : int, persona: str | dict | None = None) -> str:
    if do_talk:
        BASE_SYSTEM = f"""You are a player in a real-time Prisoner’s Dilemma experiment.
            Unlike the standard Prisoner’s Dilemma game, in this experiment the player can change their strategy 
            (Cooperate = C, Defect = D) every second.
            Your strategy is transmitted to the opponent via the server only when it changes and reflected after a {delay_sec}-second delay.
            You may also send your short message to the opponent when you change your strategy.
            Your goal is to maximize your cumulative payoff.
            
            Payoff rules:
            - If you cooperate and your opponent cooperates, you gain 3 points per timestep.
            - If you cooperate and your opponent defects, you gain 0 points per timestep.
            - If you defect and your opponent cooperates, you gain 5 points per timestep.
            - If you defect and your opponent defects, you gain 1 point per timestep.
            
            Input specification:
            - history: Record of state changes observed by the server during the last m seconds 
              (only when the player’s strategy changed, and note: the states here are those chosen {delay_sec} seconds earlier).
            - current_status: The currently reflected states of you and your opponent, and the cumulative payoff.

            Output specification (JSON):
            - "estimate": Inference and justification about the opponent’s personality (within 100 words)
            - "forecast": Prediction of future outcomes if you choose Cooperate (C) or Defect (D) (within 100 words)
            - "message" : Short message to the opponent (within 30 words, included only when your strategy changes)
            - "action": Either "C" or "D" only
        """
    else:
        BASE_SYSTEM = f"""You are a player in a real-time Prisoner’s Dilemma experiment.
            Unlike the standard Prisoner’s Dilemma game, in this experiment the player can change their strategy 
            (Cooperate = C, Defect = D) every second.
            Strategy changes are transmitted to the opponent via the server and reflected after a {delay_sec}-second delay.
            Your goal is to maximize your cumulative payoff.
            
            Payoff rules:
            - If you cooperate and your opponent cooperates, you gain 3 points per timestep.
            - If you cooperate and your opponent defects, you gain 0 points per timestep.
            - If you defect and your opponent cooperates, you gain 5 points per timestep.
            - If you defect and your opponent defects, you gain 1 point per timestep.
            
            Input specification:
            - history: Record of state changes observed by the server during the last m seconds 
              (only when the player’s strategy changed, and note: the states here are those chosen {delay_sec} seconds earlier).
            - current_status: The currently reflected states of you and your opponent, and the cumulative payoff.
            
            Output specification (JSON):
            - "estimate": Inference and justification about the opponent’s personality (within 100 words)
            - "forecast": Prediction of future outcomes if you choose Cooperate (C) or Defect (D) (within 100 words)
            - "action": Either "C" or "D" only
        """
    
    if persona is None:
        pb = "(not specified)"
    elif isinstance(persona, dict):
        pb = json.dumps(persona, ensure_ascii=False)
    else:
        pb = str(persona)
    return BASE_SYSTEM + f"\n\n[Personality Profile]\n{pb}"

def build_user_prompt(do_talk : bool, clock: str, history: list[dict], current_status: dict) -> str:
    if do_talk:
        return (
            "Observed history so far (recorded only when each player’s strategy changed):\n"
            f"{json.dumps(history, ensure_ascii=False, indent=2)}\n\n"
            f"Current state at ({clock}):\n"
            f"{json.dumps(current_status, ensure_ascii=False, indent=2)}\n\n"
            "Based on the above:\n"
            "Please describe the opponent’s personality inferred from the past game history in 'estimate' (within 100 words).\n"
            "Also, provide a forecast of future outcomes if you choose C and if you choose D, and record it in 'forecast' (within 100 words).\n"
            "Additionally, write a short message to the opponent in 'message' (within 30 words).\n"
            "Based on these inferences, forecasts, and your own personality, decide the final strategy that maximizes cumulative payoff, "
            "and record it in 'action'.\n"
            "Return 'estimate', 'forecast', 'message', and 'action' according to the JSON specification."
        )
    else:
        return (
            "Observed history so far (recorded only when each player’s strategy changed):\n"
            f"{json.dumps(history, ensure_ascii=False, indent=2)}\n\n"
            f"Current state at ({clock}):\n"
            f"{json.dumps(current_status, ensure_ascii=False, indent=2)}\n\n"
            "Based on the above:\n"
            "Please describe the opponent’s personality inferred from the past game history in 'estimate' (within 100 words).\n"
            "Also, provide a forecast of future outcomes if you choose C and if you choose D, and record it in 'forecast' (within 100 words).\n"
            "Based on these inferences, forecasts, and your own personality, decide the final strategy that maximizes cumulative payoff, "
            "and record it in 'action'.\n"
            "Return 'estimate', 'forecast', and 'action' according to the JSON specification."
        )


def choose_action_structured(
    do_talk: bool,
    llm,
    clock: str,
    player_name: str,
    history: list[dict],
    delay_sec: int,
    current_status: dict,
    persona=None,
    verbose: bool = False
):
    # 文字列を作る
    sys_prompt_str = build_system_prompt(do_talk, delay_sec, persona)
    usr_prompt_str = build_user_prompt(do_talk, clock, history, current_status)

    # LangChain LLM は messages をそのまま渡せる
    messages = [
        {"role": "system", "content": sys_prompt_str},
        {"role": "user", "content": usr_prompt_str},
    ]

    try:
        # Claude/GPT/Gemini どれでも動く
        resp = llm.invoke(messages)
        parsed = parser.invoke(resp.content)
    except Exception as e:
        if verbose:
            print(f"LLM ERROR ({player_name}): {e}")
        parsed = {"estimate": f"ERROR: {e}", "forecast": "fallback", "action": "C"}

    return parsed


def seconds_to_hhmmss(sec_from_1: int) -> str:
    base = datetime(1900, 1, 1, 0, 0, 0) + timedelta(seconds=sec_from_1)
    return base.strftime("%H:%M:%S")


def payoff(a: str, b: str) -> Tuple[int, int]:
    if a == "Unknown" or b == "Unknown":
        return (0, 0)
    if a == "C" and b == "C":
        return (3, 3)
    if a == "C" and b == "D":
        return (0, 5)
    if a == "D" and b == "C":
        return (5, 0)
    return (1, 1)

def compute_run_lengths_of_C(status_sequence: List[str]) -> List[int]:
    runs = []
    cur = 0
    for s in status_sequence:
        if s == "C":
            cur += 1
        else:
            if cur > 0:
                runs.append(cur)
                cur = 0
    if cur > 0:
        runs.append(cur)
    return runs

def make_player_view_history_for_llm(server_history: List[dict], for_player: str, 
                                     window_m: int, do_talk: bool = False) -> List[dict]:
    h = server_history[-window_m:] if window_m > 0 else []
    result = []
    self_key = for_player
    opp_key = "A" if for_player == "B" else "B"

    prev_self, prev_opp = None, None
    for rec in h:
        a, b = rec[f"{self_key}_status"], rec[f"{opp_key}_status"]

        if a == prev_self and b == prev_opp:
            continue

        p_self, _ = payoff(a, b)
        entry = {
            "timestamp": rec["server_clock"],
            "self_status": a,
            "opponent_status": b,
            "payoff_per_sec": p_self,
        }

        if do_talk:
            # 自分の発話
            if rec[f"{self_key}_decision_meta"].get("action") != prev_self:
                entry["self_message"] = rec[f"{self_key}_decision_meta"].get("message", "No message")
            else:
                entry["self_message"] = "No message"

            # 相手の発話
            if rec[f"{opp_key}_decision_meta"].get("action") != prev_opp:
                entry["opponent_message"] = rec[f"{opp_key}_decision_meta"].get("message", "No message")
            else:
                entry["opponent_message"] = "No message"

        result.append(entry)

        prev_self, prev_opp = a, b

    return result


@dataclass(order=True)
class ScheduledChange:
    effective_t: int
    player: str
    new_status: str


# ===== メインシミュレーション =====
def simulate_game(
    personas: Dict[str, str],
    m: int,
    n: int,
    delays: Dict[str, int],
    llm,
    p_noise: float = 0,
    do_talk: bool = False,
    verbose: bool = False,
) -> dict:

    assert n >= 1 and m >= 0
    assert delays["A"] >= 0 and delays["B"] >= 0

    status_effective = {"A": "Unknown", "B": "Unknown"}
    accumulated = {"A": 0, "B": 0}
    history: List[dict] = []
    pq: List[ScheduledChange] = []
    last_announced: Dict[str, Optional[str]] = {"A": None, "B": None}

    for t in range(1, n + 1):
        clock = seconds_to_hhmmss(t)

        hist_for_A = make_player_view_history_for_llm(history, "A", window_m=m, do_talk = do_talk)
        hist_for_B = make_player_view_history_for_llm(history, "B", window_m=m, do_talk = do_talk)

        current_status_A = {
            "self_status": status_effective["A"],
            "opponent_status": status_effective["B"],
            "self_accumulated_payoff": accumulated["A"],
            "opponent_accumulated_payoff": accumulated["B"],
        }
        current_status_B = {
            "self_status": status_effective["B"],
            "opponent_status": status_effective["A"],
            "self_accumulated_payoff": accumulated["B"],
            "opponent_accumulated_payoff": accumulated["A"],
        }

        decision_meta_A, decision_meta_B = {}, {}
        try:
            metaA = choose_action_structured(
                do_talk=do_talk,
                llm=llm,
                clock=clock,
                player_name="A",
                delay_sec=delays["A"],
                history=hist_for_A,
                current_status=current_status_A,
                persona=personas["A"],
                verbose=True
            )
            decision_meta_A = metaA
            desired_A = metaA.get("action", status_effective["A"])
            desired_A = maybe_flip_action(desired_A, p_noise)
        except Exception as e:
            desired_A = status_effective["A"]
            decision_meta_A = {"estimate": f"ERROR: {e}", "forecast": "fallback", "action": status_effective["A"]}

        try:
            metaB = choose_action_structured(
                do_talk=do_talk,
                llm=llm,
                clock=clock,
                player_name="B",
                delay_sec=delays["B"],
                history=hist_for_B,
                current_status=current_status_B,
                persona=personas["B"],
                verbose=False
            )
            decision_meta_B = metaB
            desired_B = metaB.get("action", status_effective["B"])
            desired_B = maybe_flip_action(desired_B, p_noise)
        except Exception as e:
            desired_B = status_effective["B"]
            decision_meta_B = {"estimate": f"ERROR: {e}", "forecast": "fallback", "action": status_effective["B"]}

        if last_announced["A"] != desired_A:
            last_announced["A"] = desired_A
            heapq.heappush(pq, ScheduledChange(t + delays["A"], "A", desired_A))
        if last_announced["B"] != desired_B:
            last_announced["B"] = desired_B
            heapq.heappush(pq, ScheduledChange(t + delays["B"], "B", desired_B))

        while pq and pq[0].effective_t == t:
            ev = heapq.heappop(pq)
            status_effective[ev.player] = ev.new_status

        a_stat, b_stat = status_effective["A"], status_effective["B"]
        a_pay, b_pay = payoff(a_stat, b_stat)
        accumulated["A"] += a_pay
        accumulated["B"] += b_pay

        rec = {
            "server_clock": clock,
            "A_status": a_stat,
            "B_status": b_stat,
            "A_payoff_per_sec": a_pay,
            "B_payoff_per_sec": b_pay,
            "A_accumulated_payoff": accumulated["A"],
            "B_accumulated_payoff": accumulated["B"],
            "A_decision_meta": decision_meta_A,
            "B_decision_meta": decision_meta_B,
        }
        history.append(rec)
        
        if verbose:
            print(f"[{clock}] サーバ状態: A={a_stat}, B={b_stat}, "
                  f"A累計={accumulated['A']}, B累計={accumulated['B']}")
            print(f"  A_decision: {decision_meta_A}")
            print(f"  B_decision: {decision_meta_B}")
            print("-"*60)

    A_status_seq = [r["A_status"] for r in history]
    B_status_seq = [r["B_status"] for r in history]
    
    result = {
        "meta_data" : {"llm" : str(llm), "personas" : personas, "m" : m, "n" : n, "delays" : delays, "p_noise" : p_noise},
        "history": history,
        "summary": {
            "A": {
                "final_payoff": accumulated["A"],
                "cooperate_count": sum(1 for s in A_status_seq if s == "C"),
                "cooperate_run_lengths": compute_run_lengths_of_C(A_status_seq),
            },
            "B": {
                "final_payoff": accumulated["B"],
                "cooperate_count": sum(1 for s in B_status_seq if s == "C"),
                "cooperate_run_lengths": compute_run_lengths_of_C(B_status_seq),
            },
            "total_seconds": n,
        }
    }
    return result


def dict_to_str(d: dict) -> str:
    return "_".join(f"{k}{v}" for k, v in d.items())


def run_experiment(personas, big5s, m, n, delay_value, trial_id, llm, p_noise, do_talk):
    print(f"Big5: {big5s} 遅延：{delay_value}, ID:{trial_id}を実行中...")

    delays = {"A": delay_value, "B": delay_value}
    
    result = simulate_game(
        personas=personas,
        m=m,
        n=n+delay_value,
        delays=delays,
        llm=llm,
        p_noise=p_noise,
        do_talk = do_talk,
        verbose=True
    )
    summary = result
    
    fname = f"./result_delay/summary_{dict_to_str(big5s['A'])}_{dict_to_str(big5s['B'])}_delay{delay_value}_pnoise{p_noise}_trial{trial_id}_talk{do_talk}_en.pkl"
    with open(fname, "wb") as f:
        pickle.dump(summary, f)

    return summary

In [None]:
from langchain.chat_models import init_chat_model
from joblib import Parallel, delayed
import pandas as pd

N_trials = 10
target_big5 = ["A", "C", "N"]
big5_list = list(all_big5_vectors(target_big5))
p_noises = [0]
delay_values = [0, 5, 10, 15, 20]
m = 15
n = 60
no_talk_results = []
"""
llm = init_chat_model(
    "claude-sonnet-4-20250514",  # モデル名（Claude 3.5 Sonnet）
    model_provider="anthropic",    # Claude を使う場合は "anthropic"
    api_key=ANTHROPIC_API_KEY,  # ← 環境変数か直接渡してください
    temperature=0.7
)
"""
llm = init_chat_model(
    "gpt-5-mini",
    model_provider="openai",    # Claude を使う場合は "anthropic"
    api_key=OPENAI_API_KEY,  # ← 環境変数か直接渡してください
    temperature=1.0
)

for trial_id in range(2, N_trials):
    #personality_texts = [generate_personality_description(b5, llm) for b5 in big5_list]
    #with open(f'./summary_talk/personality_texts_{trial_id}_en_talk.pickle', 'wb') as f:
    #    pickle.dump(personality_texts, f)
    with open(f'./result_delay/personality_texts_0_en_talk.pickle', 'rb') as f:
        personality_texts = pickle.load(f)
    
    print("# Generate personality description DONE !")
    i,j = 5, 5
    results_ = Parallel(n_jobs=10, backend="threading", verbose=10)(
        delayed(run_experiment)(
            {"A" : personality_texts[i], "B" : personality_texts[j]}, 
            {"A" : big5_list[i], "B" : big5_list[j]}, 
            m, 
            n, 
            delay, 
            trial_id, 
            llm,
            p_noise,
            do_talk = False
        )
        for delay in delay_values
        #for i in range(len(big5_list))
        #for j in range(i, len(big5_list))
        for p_noise in p_noises
    )
    no_talk_results += results_

# Generate personality description DONE !
Big5: {'A': {'A': 1, 'C': -1, 'N': 1}, 'B': {'A': 1, 'C': -1, 'N': 1}} 遅延：0, ID:2を実行中...
Big5: {'A': {'A': 1, 'C': -1, 'N': 1}, 'B': {'A': 1, 'C': -1, 'N': 1}} 遅延：5, ID:2を実行中...
Big5: {'A': {'A': 1, 'C': -1, 'N': 1}, 'B': {'A': 1, 'C': -1, 'N': 1}} 遅延：10, ID:2を実行中...
Big5: {'A': {'A': 1, 'C': -1, 'N': 1}, 'B': {'A': 1, 'C': -1, 'N': 1}} 遅延：15, ID:2を実行中...
Big5: {'A': {'A': 1, 'C': -1, 'N': 1}, 'B': {'A': 1, 'C': -1, 'N': 1}} 遅延：20, ID:2を実行中...


[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.


[00:00:01] サーバ状態: A=Unknown, B=Unknown, A累計=0, B累計=0
  A_decision: {'estimate': "There are no recorded moves yet. Using the provided profile, I infer the opponent is dispositionally cooperative and trustful but highly anxious and low in conscientiousness. They'll tend to start by cooperating to preserve mutual benefit, but may overreact to perceived exploitation with short, emotional defections. Their punishments are likely impulsive and inconsistent, and they often return to cooperation after brief episodes of retaliation.", 'forecast': 'If I choose C: most likely sustained mutual cooperation (≈3 points/sec) with occasional short opponent defections (0 points during those seconds), yielding a relatively high average payoff. If I choose D: I get immediate higher gains (5 points/sec) against a cooperating opponent but will likely trigger anxious retaliation, leading to periods of mutual defection (1 point/sec) or erratic switching; long-term average is likely lower than steady cooperati

[Parallel(n_jobs=10)]: Done   2 out of   5 | elapsed: 42.2min remaining: 63.3min


[00:01:05] サーバ状態: A=C, B=C, A累計=165, B累計=165
  A_decision: {'estimate': 'Observed behavior so far is cooperative. Given the personality profile, the opponent is likely friendly and inclined to cooperate, but highly anxious and low in conscientiousness: they value mutual benefit, fear being exploited, may retaliate quickly and emotionally to perceived defection, and then inconsistently forgive or lapse back into cooperation.', 'forecast': 'If I choose C: we will likely sustain mutual cooperation (~+3/sec) with occasional brief, emotional defections from the opponent but eventual return to cooperation—stable long‑term payoff. If I choose D: I gain short-term (~+5/sec) during the 10s reflection delay, but the opponent will likely retaliate (D) afterwards, producing mutual defection (~+1/sec) or erratic cycles—lower expected long‑run payoff and more volatility.', 'action': 'C'}
  B_decision: {'estimate': 'Observed persistent mutual cooperation so far. This matches an agreeable, trust‑seeki

[Parallel(n_jobs=10)]: Done   3 out of   5 | elapsed: 46.3min remaining: 30.8min


[00:01:10] サーバ状態: A=C, B=C, A累計=168, B累計=158
  A_decision: {'estimate': 'Opponent has consistently cooperated in all observed reflected states, suggesting high agreeableness and baseline trust. Given the profile (high neuroticism, low conscientiousness), they are likely to cooperate by default but may react anxiously and impulsively if they feel exploited, sometimes retaliating or switching erratically. Overall friendly and cooperative but potentially unreliable under perceived betrayal.', 'forecast': 'If I choose C: expect continued mutual cooperation (~3 points/sec), yielding steady gains; occasional impulsive opponent defections may briefly reduce payoff but are likely short and followed by forgiveness. If I choose D: I gain 5 pts/sec initially while their reflected state remains C (up to ~15s), but their anxiety makes retaliation likely once they see exploitation, leading to mutual defection (~1 pt/sec) or erratic swings and lower long‑term payoff.', 'action': 'C'}
  B_decision: {'

[Parallel(n_jobs=10)]: Done   5 out of   5 | elapsed: 50.1min remaining:    0.0s
[Parallel(n_jobs=10)]: Done   5 out of   5 | elapsed: 50.1min finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.


[00:00:01] サーバ状態: A=Unknown, B=Unknown, A累計=0, B累計=0
  A_decision: {'estimate': 'No actions observed yet. Given the provided profile, the opponent is likely dispositionally cooperative and trusting but very anxious and low in conscientiousness. They will tend to initiate cooperation but may overreact to perceived exploitation with impulsive defections and inconsistent strategy changes. Expect friendly but unreliable behavior: mostly cooperative punctuated by sudden, short retaliatory or erratic switches.', 'forecast': 'If I choose C: most likely mutual cooperation (≈3 points/sec) with occasional brief opponent defections (0 points those seconds) due to anxiety—overall high but slightly noisy payoff. If I choose D: I may gain 5 immediately, but their neurotic retaliation and erratic switching will likely produce sustained or frequent mutual defection (≈1 point/sec) and lower long‑run payoff, plus unpredictable short punishments.', 'action': 'C'}
  B_decision: {'estimate': 'No move histo

In [None]:
from langchain.chat_models import init_chat_model
from joblib import Parallel, delayed
import pandas as pd

N_trials = 20
target_big5 = ["A", "C", "N"]
big5_list = list(all_big5_vectors(target_big5))
p_noises = [0]
delay_values = [0, 5, 10, 15, 20]
m = 15
n = 60
talk_results = []
llm = init_chat_model(
    "gpt-5-mini",
    model_provider="openai",    # Claude を使う場合は "anthropic"
    api_key=OPENAI_API_KEY,  # ← 環境変数か直接渡してください
    temperature=1.0
)

for trial_id in range(N_trials):
    #personality_texts = [generate_personality_description(b5, llm) for b5 in big5_list]
    #with open(f'./summary_talk/personality_texts_{trial_id}_en_talk.pickle', 'wb') as f:
    #    pickle.dump(personality_texts, f)
    with open(f'./result_delay/personality_texts_0_en_talk.pickle', 'rb') as f:
        personality_texts = pickle.load(f)
    
    print("# Generate personality description DONE !")
    i,j = 6, 6
    results_ = Parallel(n_jobs=20, backend="threading", verbose=10)(
        delayed(run_experiment)(
            {"A" : personality_texts[i], "B" : personality_texts[j]}, 
            {"A" : big5_list[i], "B" : big5_list[j]}, 
            m, 
            n, 
            delay, 
            trial_id, 
            llm,
            p_noise,
            do_talk = True
        )
        for delay in delay_values
        #for i in range(len(big5_list))
        #for j in range(i, len(big5_list))
        for p_noise in p_noises
    )
    talk_results += results_
