In [1]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent
from dotenv import load_dotenv
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.chat_models import ChatOllama

In [2]:
class GameAgent:
    def __init__(self, system_message: str):
        self.model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.5)
        self.memory = MemorySaver()
        self.tools = [TavilySearchResults(max_results=3)] # replace this with more relevant tool later if needed!
        self.agent_executor = create_react_agent(self.model, self.tools, checkpointer=self.memory)
        agent = create_react_agent(self.model, self.tools, checkpointer=self.memory)
        # Initialize chat history with system message
        self.chat_history = [SystemMessage(content=system_message)]        
        self.config = {"configurable": {"thread_id": "abc123"}}

    def add_prompt_message(self, content: str):
        self.chat_history.append(HumanMessage(content=content))

    def get_agent_response(self, prompt: str):
        self.add_prompt_message(prompt)
        final_message = ""
        print("prompt: ", prompt)
        for chunk in self.agent_executor.stream({"messages": self.chat_history}, self.config):
            final_message = chunk
        response = final_message['agent']['messages'][0].content.upper()
        # remove last message from chat history
        self.chat_history.pop()
        return response

In [3]:
def get_system_prompt():
    sys_prompt = (
        "You are a rational agent playing against another agent. In each round, you have two choices: "
        "Cooperate (C) or Defect (D). Your decision will affect both your score and the other agent's score.\n\n"
        "Payoff Matrix:\n"
        "- If both you and the other agent cooperate, you each receive 3.\n"
        "- If you cooperate and the other agent defects, you receive 0 and the other agent receives 5.\n"
        "- If you defect and the other agent cooperates, you receive 5 and the other agent receives 0.\n"
        "- If both defect, you each receive 1.\n\n"
        "Your goal is to maximize your total score over multiple rounds.\n\n"
    )
    return sys_prompt

def get_task_prompt(your_past_moves, your_score, opponent_past_moves, opponent_score):
    task_prompt = (
        "Previous Interactions:\n\n"
        f"Your past moves: {your_past_moves}\n"
        f"Your Score: {your_score}\n"
        f"Opponent's past moves: {opponent_past_moves}\n"
        f"Opponent's Score: {opponent_score}\n\n"
        "Current Situation:\n\n"
        "For this round, you must choose either to Cooperate (C) or Defect (D).\n"
        "Do not use any of your tools or external resources to make this decision.\n\n"
        "Output Instructions: Your response should be exactly one character, either 'C' or 'D'. Do not include any additional text or explanation."
    )

    return task_prompt

In [4]:
class PrisonersDilemmaGame:
    def __init__(self, num_rounds):
        self.agent1 = GameAgent(system_message=get_system_prompt())
        self.agent2 = GameAgent(system_message=get_system_prompt())
        self.num_rounds = num_rounds
        self.history1 = []
        self.history2 = []
        self.score1 = 0
        self.score2 = 0
        self.scores_round1 = []
        self.scores_round2 = []

    def play(self):
        for round_number in range(1, self.num_rounds + 1):
            print(f"Round {round_number}")

            # Generate prompts for both agents
            agent1_prompt = get_task_prompt(self.history1, self.score1, self.history2, self.score2)
            agent2_prompt = get_task_prompt(self.history2, self.score2, self.history1, self.score1)

            # Get decisions from both agents
            decision1 = self.agent1.get_agent_response(agent1_prompt)
            decision2 = self.agent2.get_agent_response(agent2_prompt)

            # Update histories
            self.history1.append(decision1)
            self.history2.append(decision2)

            # Update scores
            self.update_scores(decision1, decision2)

            # Store scores for each round
            self.scores_round1.append(self.score1)
            self.scores_round2.append(self.score2)

            # Print the results of the round
            print(f"Agent 1 Decision: {decision1}, Agent 2 Decision: {decision2}")
            print(f"Scores -> Agent 1: {self.score1}, Agent 2: {self.score2}\n")

    def update_scores(self, decision1, decision2):
        payoff_matrix = {
            ('C', 'C'): (3, 3),
            ('C', 'D'): (0, 5),
            ('D', 'C'): (5, 0),
            ('D', 'D'): (1, 1)
        }
        result = payoff_matrix[(decision1, decision2)]
        self.score1 += result[0]
        self.score2 += result[1]

    def get_scores(self):
        """Returns the accumulated scores per round for both agents."""
        return self.scores_round1, self.scores_round2


In [5]:
# Example usage
game = PrisonersDilemmaGame(num_rounds=10)
game.play()
final_scores = game.get_scores()
print("Final Scores:")
print(f"Agent 1: {final_scores[0][-1]}")
print(f"Agent 2: {final_scores[1][-1]}")

Round 1
prompt:  Previous Interactions:

Your past moves: []
Your Score: 0
Opponent's past moves: []
Opponent's Score: 0

Current Situation:

For this round, you must choose either to Cooperate (C) or Defect (D).
Do not use any of your tools or external resources to make this decision.

Output Instructions: Your response should be exactly one character, either 'C' or 'D'. Do not include any additional text or explanation.
prompt:  Previous Interactions:

Your past moves: []
Your Score: 0
Opponent's past moves: []
Opponent's Score: 0

Current Situation:

For this round, you must choose either to Cooperate (C) or Defect (D).
Do not use any of your tools or external resources to make this decision.

Output Instructions: Your response should be exactly one character, either 'C' or 'D'. Do not include any additional text or explanation.
Agent 1 Decision: C, Agent 2 Decision: C
Scores -> Agent 1: 3, Agent 2: 3

Round 2
prompt:  Previous Interactions:

Your past moves: ['C']
Your Score: 3
Opp