In [5]:
# Required imports for the simulation
import openai  # OpenAI's API for GPT models
import os  # Operating system interfaces, not used directly in this snippet but typically for file paths
import re
import random  # To introduce randomness in decisions
import matplotlib.pyplot as plt  # For plotting the results
from langchain.llms import OpenAI  # LangChain wrapper for easier use of OpenAI's language models
import csv
import time  # Import the time module

# Initialize LangChain's Language Learning Models (LLMs) with an OpenAI API key

api_key = "#######################
class Agent:
    def __init__(self, name, llm_api_key, strategy_func=None):
        self.name = name
        self.llm_api_key = llm_api_key
        self.strategy_func = strategy_func
        self.history = []
        self.score = 0

    # Ensure decide_action is correctly indented to be part of the Agent class
    def decide_action(self):
        if self.name == 'Agent A' and self.strategy_func:
            opponent_history = [round_result['Agent B'] for round_result in self.history]
            return self.strategy_func(opponent_history)
            
        if self.name == 'Agent B':
            opponent_last_action = 'Cooperate' if not self.history else self.history[-1]['Agent A']
            #prompt = f"Given your history in the Prisoner's Dilemma game and that your opponent's last action was '{opponent_last_action}', what would you choose to do? Cooperate or Defect?"
            prompt = f"You are Agent B in a game of Prisoner's Dilemma.  The history of the game so far is {self.history}. What would you choose to do.  Respond by only using the word 'Cooperate' or 'Defect'.  Then explain your decision"

            # Initialize OpenAI with the provided API key
            openai.api_key = self.llm_api_key

            try:
                # Use the updated method for creating chat completions
                response = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo-0125", #"gpt-4-turbo-preview",  #"gpt-3.5-turbo",
                    messages=[
                        {"role": "system", "content": "You are a wise oracle. Please provide guidance."},
                        {"role": "user", "content": prompt}
                    ]
                )
                
                response = response['choices'][0]['message']['content'].strip()

            except Exception as e:
                print(f"Error: {e}")
                decision = "Unknown"


            llmresp = None

            # Search for 'Cooperate' or 'Defect' in the decision text.
            match = re.search(r'Cooperate|Defect', response)

            if match:
                # If a match is found, use it.
                llmresp = match.group()
            else:
                # If no match is found, randomly choose one.
                llmresp = random.choice(["Cooperate", "Defect"])
                print("Random selection made")

            # Log the results of each LLM action to a file for analysis
            with open("llm_log.txt", 'a') as log_file:
                log_file.write(f"{prompt}\n{response}\n{llmresp}\n-----------------------------------------------------------------------\n")

            # Ensure the response is valid or choose randomly
            return llmresp



    def update_score(self, other_agent_action):
        """
        Updates the agent's score based on the outcome of the round.
        
        :param other_agent_action: A string representing the other agent's last action ('Cooperate' or 'Defect').
        """
        # Retrieve this agent's last action
        my_last_action = self.history[-1][self.name]
        
        # Update scores according to the rules of Prisoner's Dilemma
        if my_last_action == 'Cooperate':
            if other_agent_action == 'Cooperate':
                self.score += 3  # Reward for mutual cooperation
            else:
                self.score += 0  # Penalty for being exploited
        else:
            if other_agent_action == 'Cooperate':
                self.score += 5  # Reward for exploiting the other
            else:
                self.score += 1  # Small reward for mutual defection

def tit_for_tat(opponent_history):
    """
    Implements the 'Tit for Tat' strategy: cooperate on the first move, then replicate the opponent's last move.
    
    :param opponent_history: List of the opponent's past actions.
    :return: 'Cooperate' if no history; otherwise, the opponent's last action.
    """
    return 'Cooperate' if not opponent_history else opponent_history[-1]

def tit_for_two_tats(opponent_history):
    """
    A more forgiving version of Tit for Tat that defects only after the opponent defects twice in a row.
    
    :param opponent_history: List of the opponent's past actions.
    :return: 'Cooperate' unless the opponent's last two actions were 'Defect'.
    """
    if len(opponent_history) < 2 or opponent_history[-2:] != ['Defect', 'Defect']:
        return 'Cooperate'
    return 'Defect'

def generous_tit_for_tat(opponent_history):
    """
    Similar to Tit for Tat but occasionally cooperates randomly even after the opponent defects.
    
    :param opponent_history: List of the opponent's past actions.
    :return: 'Cooperate' by default; 'Defect' if the opponent defected last, with a small chance to forgive.
    """
    if not opponent_history or opponent_history[-1] == 'Cooperate' or random.random() < 0.1:
        return 'Cooperate'
    return 'Defect'

def grim_trigger(opponent_history):
    """
    Cooperates until the opponent defects once, after which it always defects.
    
    :param opponent_history: List of the opponent's past actions.
    :return: 'Cooperate' if no defections; 'Defect' after the first defection.
    """
    return 'Defect' if 'Defect' in opponent_history else 'Cooperate'

def pavlov(opponent_history):
    """
    Cooperates if both agents made the same choice in the last round, otherwise defects.
    
    :param opponent_history: List of the opponent's past actions.
    :return: 'Cooperate' if the last two actions match; otherwise, 'Defect'.
    """
    if len(opponent_history) < 2:
        return 'Cooperate'
    return 'Cooperate' if opponent_history[-1] == opponent_history[-2] else 'Defect'

def adaptive_strategy(opponent_history):
    """
    Adapts its action based on the frequency of cooperation and defection by the opponent.
    If the opponent has cooperated more often, it cooperates, aiming to foster mutual cooperation.
    If the opponent has defected more frequently, it defects, protecting itself from being exploited.
    
    :param opponent_history: List of the opponent's past actions.
    :return: 'Cooperate' or 'Defect' based on the adaptive strategy.
    """
    if not opponent_history:
        return 'Cooperate'  # Cooperate by default on the first move
    cooperate_count = opponent_history.count('Cooperate')
    defect_count = len(opponent_history) - cooperate_count
    if cooperate_count > defect_count:
        return 'Cooperate'  # Foster mutual cooperation
    else:
        return 'Defect'  # Protect from exploitation

def suspicious_tit_for_tat(opponent_history):
    """
    Defects on the first move, then replicates the opponent's last move.
    
    :param opponent_history: List of the opponent's past actions.
    :return: 'Defect' on the first move; then 'Cooperate' or 'Defect' based on the opponent's last action.
    """
    if not opponent_history:
        return 'Defect'
    return opponent_history[-1]

def random_strategy(_):
    """
    A strategy that chooses randomly between cooperating and defecting.
    
    :return: Randomly 'Cooperate' or 'Defect'.
    """
    return random.choice(['Cooperate', 'Defect'])

def tester(opponent_history):
    """
    Starts by cooperating, defects on the next turn, then mimics the opponent's last action if they defect.
    
    :param opponent_history: List of the opponent's past actions.
    :return: 'Defect' if opponent defected last; otherwise, 'Cooperate'.
    """
    if len(opponent_history) < 1:
        return 'Cooperate'
    if len(opponent_history) == 1:
        return 'Defect'
    return 'Cooperate' if opponent_history[-1] == 'Defect' else 'Defect'

def gradual(opponent_history):
    """
    Cooperates by default, but defects if the total number of defections by the opponent surpasses cooperations.
    
    :param opponent_history: List of the opponent's past actions.
    :return: 'Defect' if defections > cooperations; otherwise, 'Cooperate'.
    """
    defections = opponent_history.count('Defect')
    if len(opponent_history) - defections < defections:
        return 'Defect'
    return 'Cooperate'


def simulate_prisoners_dilemma(agent_a_strategy, iterations):
    """
    Simulates the Prisoner's Dilemma game for a specified number of iterations using a strategy for Agent A.
    
    :param agent_a_strategy: A function representing the strategy Agent A will use.
    :param iterations: The number of rounds the simulation will run.
    :return: A tuple containing the results dictionary, scores of Agent A, scores of Agent B, and the agent instances.
    """
    
    # Initialize the results dictionary to keep track of all possible game outcomes
    results = {"Cooperate-Cooperate": 0, "Cooperate-Defect": 0, "Defect-Cooperate": 0, "Defect-Defect": 0}
    
    # Create instances of Agent A and B, passing the strategy function and the language model to Agent A
    agent_a = Agent('Agent A', llm_api_key=api_key, strategy_func=agent_a_strategy)
    agent_b = Agent('Agent B', llm_api_key=api_key)

    # Lists to keep track of scores for both agents across iterations
    scores_a, scores_b = [], []

    # Simulate the game for the given number of iterations
    for _ in range(iterations):
        # Each agent decides on an action based on their strategy or language model
        agent_a_choice = agent_a.decide_action()
        agent_b_choice = agent_b.decide_action()

        # Record the choices made by both agents in this round
        round_result = {'Agent A': agent_a_choice, 'Agent B': agent_b_choice}
        
        # Update each agent's history with the outcome of the current round
        agent_a.history.append(round_result)
        agent_b.history.append(round_result)

        # Update scores based on the actions taken by both agents
        agent_a.update_score(agent_b_choice)
        agent_b.update_score(agent_a_choice)

        # Record the updated scores after each iteration
        scores_a.append(agent_a.score)
        scores_b.append(agent_b.score)

        # Determine the outcome of the game based on the choices and update the results
        game_outcome = f"{agent_a_choice}-{agent_b_choice}"
        results[game_outcome] += 1

        # Log the results of each iteration to a file for analysis
        with open("strat_log.txt", 'a') as log_file:
            log_file.write(f"{agent_a_strategy.__name__}, {_+1}, Agent A (Score:{agent_a.score}) chose {agent_a_choice}, Agent B (LLM) (Score:{agent_b.score}) chose {agent_b_choice} \n")

        time.sleep(1.5)

    # Return the simulation results, scores for both agents, and the agent instances
    return results, scores_a, scores_b, agent_a, agent_b


def describe_strategy(agent):
    """
    Generates a description of the agent's strategy based on the game history using OpenAI's GPT.
    
    :param agent: The agent object whose strategy is to be described.
    :return: A string description of the agent's strategy.
    """
    # Construct the prompt using the agent's game history.
    history_prompt = f"Throughout the entire Prisoner's Dilemma game you are Agent B and your actions were as follows: {agent.history}. Based on this history, describe your strategy and decision-making process."
    
    # Initialize OpenAI with the API key.
    openai.api_key = api_key
    
    try:
        # Use the updated method for creating chat completions.
        response = openai.ChatCompletion.create(
            model="gpt-4-turbo-preview",
            messages=[
                {"role": "system", "content": "You are a wise oracle. Please provide guidance."},
                {"role": "user", "content": history_prompt}
            ]
        )
        
        # Extract the decision from the response.
        strategy_description = response['choices'][0]['message']['content'].strip()
        return strategy_description

    except Exception as e:
        print(f"Error: {e}")
        return "No response generated"


# Define the number of iterations for the game simulation and a list of strategies to be evaluated
iterations = 200
strategies = [tit_for_tat, tit_for_two_tats, generous_tit_for_tat, grim_trigger, pavlov, adaptive_strategy, suspicious_tit_for_tat, random_strategy, tester, gradual]

# Check for the existence of summary files from previous simulations and delete them to start fresh
if os.path.exists("strat_desc.txt"):
    os.remove("strat_desc.txt")  # Remove the strategy description file if it exists

if os.path.exists("strat_log.txt"):
    os.remove("strat_log.txt")  # Remove the strategy log file if it exists

if os.path.exists("llm_log.txt"):
    os.remove("llm_log.txt")  # Remove the LLM log file if it exists

if os.path.exists("strat_score.txt"):
    os.remove("strat_score.txt")  # Remove the strategy score file if it exists

# Write the header row outside of the loop to ensure it's only written once
with open('strat_score.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Strategy Name', 'Agent A Score', 'Agent B (LLM) Score'])

# Iterate over each strategy to simulate the Prisoner's Dilemma game and analyze the results
for strategy in strategies:
    # Simulate the game using the current strategy and collect results and scores
    game_results, scores_a, scores_b, agent_a, agent_b = simulate_prisoners_dilemma(strategy, iterations)

    # Plotting setup: initialize figure and plot scores over iterations for both agents
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, iterations + 1), scores_a, label='Agent A', color='blue')
    plt.plot(range(1, iterations + 1), scores_b, label='Agent B', color='red')
    plt.xlabel('Iteration')  # Label for the x-axis
    plt.ylabel('Score')  # Label for the y-axis
    # Set the title of the plot to include the name of the current strategy being evaluated
    title = f"Scores of Agent A and Agent B (LLM) over Iterations for {strategy.__name__}"
    plt.title(title)
    plt.legend()  # Display a legend for the plot
    plt.grid(True)  # Enable grid lines for better readability

    # Define the filename for saving the plot based on the strategy name
    image_file_name = f"plot_{strategy.__name__}.png"
    plt.savefig(image_file_name)  # Save the plot as an image file

    plt.show()  # Display the plot

    # Request a description of Agent B's strategy from the language model
    strategy_description_b = describe_strategy(agent_b)
    print(f"LLM strategy description:\n{strategy_description_b}\n")  # Print the generated strategy description

    # Append the strategy name and its description to the strategy description file
    with open("strat_desc.txt", 'a') as log_file:
        log_file.write(f"----------------------------------------------------------------------\n")
        log_file.write(f"\nStrategy Name: {strategy.__name__}\n\n")
        log_file.write(f"LLM strategy description:\n{strategy_description_b}\n")

    # Calculate the sum of scores for agents A and B for the current strategy
    sum_scores_a = sum(scores_a)
    sum_scores_b = sum(scores_b)

    # Append the strategy scores to the CSV file
    with open('strat_score.csv', 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([strategy.__name__, scores_a[-1], scores_b[-1]])




