<a href="https://colab.research.google.com/github/mdob367/pris_dilem/blob/main/2023_02_05_iterated_prisoners_dilemma.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 2023-02-05 Iterated prisoner's dilemma

starlog

2/26 Stephanie: added agent "Stephanie"

2/26 Vishal: added TitForTwoTats, GrimTrigger

2/11 Mac: added two agents and probably shitty code for a tournament. altered one bit of pre-existing code to change score report to report avg score instead of total

In [None]:
from __future__ import annotations

import abc
import enum
import random
from typing import Any, Optional, Tuple

import attrs
import numpy as np
import pandas as pd

In [None]:
@enum.unique
class Decision(enum.Enum):
  COOPERATE = 0
  DEFECT = 1

In [None]:
AgentState = Any


class Agent(abc.ABC):
  @abc.abstractmethod
  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
    ) -> Tuple[Decision, AgentState]:
    """Make a new decision in an iterated prisoner's dilemma.

    Args:
      other_agents_decisions: List of decisions the other agent has made in
        your previous interactions with them, ordered from most recent to least
        recent.
      previous_state: Some object that encodes the previous internal state of
        this agent.
    Return:
      The next decision your agent makes in the iterated prisoner's dilemma, and
      the current internal state of this agent (to be passed on to subsequent
      calls to `make_decision`).
    """
    ...

  @property
  @abc.abstractmethod
  def initial_state(self) -> AgentState:
    """The initial internal state of this agent."""
    ...

In [None]:
def payoff(
    your_decision: Decision,
    other_decision: Decision,
) -> int:
  if your_decision == Decision.COOPERATE:
    if other_decision == Decision.COOPERATE:
      return 3
    if other_decision == Decision.DEFECT:
      return 0
  if your_decision == Decision.DEFECT:
    if other_decision == Decision.COOPERATE:
      return 5
    if other_decision == Decision.DEFECT:
      return 1


def play_iterated_prisoners_dilemma(
    agent_1: Agent,
    agent_2: Agent,
    expeted_number_of_interactions: float = 200.0,
    rng: Optional[np.random.Generator] = None,
) -> Tuple[int, int]:
  if rng is None:
    rng = np.random.default_rng()
  num_interactions = rng.poisson(expeted_number_of_interactions)
  score_1 = 0
  score_2 = 0
  agent_1_state = agent_1.initial_state
  agent_2_state = agent_2.initial_state
  agent_1_decisions = ()
  agent_2_decisions = ()
  for interaction in range(num_interactions):
    agent_1_decision, agent_1_state = agent_1.make_decision(
        other_agents_decisions=agent_2_decisions,
        previous_state=agent_1_state,
    )

    agent_2_decision, agent_2_state = agent_2.make_decision(
        other_agents_decisions=agent_1_decisions,
        previous_state=agent_2_state,
    )
    
    agent_1_decisions = (agent_1_decision,) + agent_1_decisions
    agent_2_decisions = (agent_2_decision,) + agent_2_decisions

    score_1 += payoff(
        your_decision=agent_1_decision,
        other_decision=agent_2_decision,
    )
    score_2 += payoff(
        your_decision=agent_2_decision,
        other_decision=agent_1_decision,
    )
  return round(score_1/num_interactions,2), round(score_2/num_interactions,2)

In [None]:
class TitForTat(Agent):
  @property
  def initial_state(self) -> AgentState:
    return None

  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
  ) -> Tuple[Decision, AgentState]:
    if not other_agents_decisions:
      return Decision.COOPERATE, None
    return other_agents_decisions[0], None

In [None]:
@attrs.frozen
class Random(Agent):
  random_seed: int

  @property
  def initial_state(self) -> AgentState:
    return np.random.default_rng(self.random_seed)

  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
  ) -> Tuple[Decision, AgentState]:
    decision = previous_state.choice(Decision)
    return decision, previous_state

In [None]:
class Mac(Agent):
  @property
  def initial_state(self) -> AgentState:
    return [0,2] # serial_defections, cynicism
  
  def cynicism(agent, state):
    hope = False
    if state[0] == state[1]**2: 
      hope = True
    if state[0] == state[1]**2 + 1:
      state[0] = 0 
      state[1] += 1
      hope = True 
    return hope, state

  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
  ) -> Tuple[Decision, AgentState]:
      if  other_agents_decisions:
         
        if other_agents_decisions[0] == Decision.DEFECT:
          previous_state[0] += 1
        else:
          previous_state[0] = 0

        play, new_state = self.cynicism(previous_state)
        if play:
          return Decision.COOPERATE, new_state 
        else:
          #previous_state[1] += 1
          return other_agents_decisions[0], new_state
      else:
        return Decision.COOPERATE, previous_state

In [None]:
class Cynic(Agent):
  @property
  def initial_state(self) -> AgentState:
    return None

  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
  ) -> Tuple[Decision, AgentState]:
    return Decision.DEFECT, None

In [None]:
class Rube(Agent):
  @property
  def initial_state(self) -> AgentState:
    return None

  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
  ) -> Tuple[Decision, AgentState]:
    return Decision.COOPERATE, None

In [None]:
class Stephanie(Agent):
  @property
  def initial_state(self) -> AgentState:
    return None

  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
  ) -> Tuple[Decision, AgentState]:
    return Decision.COOPERATE, None

In [None]:
class Troll(Agent):
  @property
  def initial_state(self) -> AgentState:
    return [0,0]    # [troll, grim trigger]

  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
  ) -> Tuple[Decision, AgentState]:

      if 1 in previous_state:        
        return Decision.DEFECT, previous_state

      num_rounds = len(other_agents_decisions)

      if num_rounds < 2:        
        return Decision.DEFECT, previous_state 
        
      elif num_rounds < 3 and (Decision.DEFECT not in other_agents_decisions):
        previous_state[0] = 1   # start trolling       
        return Decision.DEFECT, previous_state

      elif num_rounds < 4:       
        return Decision.COOPERATE, previous_state
      else:
        if other_agents_decisions[0] == Decision.COOPERATE:          
          return Decision.COOPERATE, previous_state
        else:
          previous_state[1] = 1   # grim trigger
          return Decision.DEFECT, previous_state

In [None]:
class TitForTwoTats(Agent):
  @property
  def initial_state(self) -> AgentState:
    return None

  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
  ) -> Tuple[Decision, AgentState]:

    num_rounds = len(other_agents_decisions)

    if num_rounds < 2:        
        return Decision.COOPERATE, None

    if other_agents_decisions[0] == Decision.DEFECT and other_agents_decisions[1] == Decision.DEFECT:
      return Decision.DEFECT, None
    return Decision.COOPERATE, None

In [None]:
class GrimTrigger(Agent):
  @property
  def initial_state(self) -> AgentState:
    return None

  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
  ) -> Tuple[Decision, AgentState]:

    if Decision.DEFECT in other_agents_decisions:
      return Decision.DEFECT, None
    else:
      return Decision.COOPERATE, None

In [None]:
class EricTheEvil(Agent):
  @property
  def initial_state(self) -> AgentState:
    return None

  def make_decision(
      self,
      other_agents_decisions: Tuple[Decision, ...],
      previous_state: AgentState,
  ) -> Tuple[Decision, AgentState]:

    d = sum([int(d==Decision.DEFECT) for d in other_agent_decisions])
    t = len(other_agent_decisions)
    r = random.random()
    return [Decision.COOPERATE, Decision.DEFECT][d/t < r]


In [None]:
import operator

In [None]:
@attrs.frozen
class BinomialState:
  my_decisions: list[Decision]
  decision_matrix: np.ndarray

  def add_decision(
      self,
      my_decision: Decision,
  ) -> BinomialState:
    return BinomialState(
        my_decisions=self.my_decisions + [my_decision],
        decision_matrix=self.decision_matrix,
    )

  def update_decision_matrix(
      self,
      other_decision: Decision,
  ) -> BinomialState:
    decision_matrix = self.decision_matrix.copy()
    if len(self.my_decisions) > 1:
      row_idx = self.my_decisions[-2].value
      col_idx = other_decision.value
      decision_matrix[row_idx, col_idx] += 1
    return BinomialState(
        my_decisions=self.my_decisions,
        decision_matrix=decision_matrix,
    )


class Binomial(Agent):
  @property
  def initial_state(self) -> AgentState:
    return BinomialState(
        my_decisions=[],
        decision_matrix=np.zeros((2, 2), dtype=int),
    )

  @staticmethod
  def other_is_random(decision_matrix: np.ndarray) -> bool:
    reaction_counts = np.sum(decision_matrix, axis=1)
    if np.any(reaction_counts < 2):
      return False
    conditional_defection_probs = decision_matrix[:, 1] / reaction_counts
    std_devs = np.sqrt(conditional_defection_probs * (1 - conditional_defection_probs) / (reaction_counts - 1))
    return np.logical_and(
      conditional_defection_probs + 2 * std_devs < 1,
      np.logical_and(
        conditional_defection_probs - 2 * std_devs > 0,
        np.abs(conditional_defection_probs - 0.5) < 2 * std_devs,
      ),
    ).all()

  def make_decision(self, other_agents_decisions: Tuple[Decision, ...], previous_state: AgentState) -> Tuple[Decision, AgentState]:
    if not other_agents_decisions:
      # Cooperate on first round.
      my_decision = Decision.COOPERATE
      current_state = previous_state.add_decision(my_decision)
      return my_decision, current_state
    current_state = previous_state.update_decision_matrix(other_agents_decisions[0])
    if self.other_is_random(current_state.decision_matrix):
      my_decision = Decision.DEFECT
    else:
      my_decision = other_agents_decisions[0]
    current_state = current_state.add_decision(my_decision)
    return my_decision, current_state

In [None]:
agents = [TitForTat(), Mac(), Cynic(), Random(random_seed=1), Rube(), Troll(), Binomial()] # probably disgusting beginner code heh
num_agents = len(agents)
agent_names = ['TitForTat', 'Mac', 'Cynic', 'Random', 'Rube', 'Troll', 'Binomial']
results = []

for first_agent in agents:
  results.append([])
  for opponent in agents:
    match = play_iterated_prisoners_dilemma(
        agent_1=first_agent,
        agent_2=opponent,
    )
    results[-1].append(match)
print(results)
df = pd.DataFrame(results, columns = agent_names, index = agent_names)
df

[[(3.0, 3.0), (3.0, 3.0), (1.0, 1.02), (2.21, 2.21), (3.0, 3.0), (2.98, 2.98), (3.0, 3.0)], [(3.0, 3.0), (3.0, 3.0), (0.93, 1.27), (2.18, 2.24), (3.0, 3.0), (2.99, 2.99), (3.0, 3.0)], [(1.02, 1.0), (1.27, 0.93), (1.0, 1.0), (2.91, 0.52), (5.0, 0.0), (1.04, 0.99), (1.02, 0.99)], [(2.21, 2.21), (2.27, 2.19), (0.52, 2.91), (1.95, 1.95), (4.05, 1.43), (0.56, 2.91), (0.94, 2.74)], [(3.0, 3.0), (3.0, 3.0), (0.0, 5.0), (1.42, 4.05), (3.0, 3.0), (0.0, 5.0), (3.0, 3.0)], [(2.98, 2.98), (2.99, 2.99), (0.99, 1.04), (2.9, 0.56), (5.0, 0.0), (2.98, 2.98), (2.99, 2.99)], [(3.0, 3.0), (3.0, 3.0), (1.0, 1.02), (2.73, 0.95), (3.0, 3.0), (2.98, 2.98), (3.0, 3.0)]]


Unnamed: 0,TitForTat,Mac,Cynic,Random,Rube,Troll,Binomial
TitForTat,"(3.0, 3.0)","(3.0, 3.0)","(1.0, 1.02)","(2.21, 2.21)","(3.0, 3.0)","(2.98, 2.98)","(3.0, 3.0)"
Mac,"(3.0, 3.0)","(3.0, 3.0)","(0.93, 1.27)","(2.18, 2.24)","(3.0, 3.0)","(2.99, 2.99)","(3.0, 3.0)"
Cynic,"(1.02, 1.0)","(1.27, 0.93)","(1.0, 1.0)","(2.91, 0.52)","(5.0, 0.0)","(1.04, 0.99)","(1.02, 0.99)"
Random,"(2.21, 2.21)","(2.27, 2.19)","(0.52, 2.91)","(1.95, 1.95)","(4.05, 1.43)","(0.56, 2.91)","(0.94, 2.74)"
Rube,"(3.0, 3.0)","(3.0, 3.0)","(0.0, 5.0)","(1.42, 4.05)","(3.0, 3.0)","(0.0, 5.0)","(3.0, 3.0)"
Troll,"(2.98, 2.98)","(2.99, 2.99)","(0.99, 1.04)","(2.9, 0.56)","(5.0, 0.0)","(2.98, 2.98)","(2.99, 2.99)"
Binomial,"(3.0, 3.0)","(3.0, 3.0)","(1.0, 1.02)","(2.73, 0.95)","(3.0, 3.0)","(2.98, 2.98)","(3.0, 3.0)"


In [None]:
play_iterated_prisoners_dilemma(
    agent_1=TitForTat(),
    agent_2=Random(random_seed=1),
)

(2.2, 2.22)

In [None]:
play_iterated_prisoners_dilemma(
    agent_1=TitForTat(),
    agent_2=TitForTat(),
)

(3.0, 3.0)

In [None]:
play_iterated_prisoners_dilemma(
    agent_1=TitForTat(),
    agent_2=Random(random_seed=0),
)

(2.15, 2.15)

In [None]:
play_iterated_prisoners_dilemma(
    agent_1=TitForTat(),
    agent_2=Random(random_seed=1),
)

(2.2, 2.23)