In [2]:
!pip3 install -q agent-exchange

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [1]:
from exchange import PrisonersDilemmaExchange
from agent import PrisonersDilemmaBaseAgent, Actions
from utils import BufferList

In [2]:
class Altruist(PrisonersDilemmaBaseAgent):
    def __init__(self):
        super().__init__()
        
    def get_action(self, exchange_state):
        action = Actions.NO_DEFECT
        self.historical_actions.append(action)
        return action

In [3]:
class Antagonist(PrisonersDilemmaBaseAgent):
    def get_action(self, exchange_state):
        action = Actions.DEFECT
        self.historical_actions.append(action)
        return action

In [4]:
class TitForTat(PrisonersDilemmaBaseAgent):
    def __init__(self) -> None:
        super().__init__()
        self.adversary_historical_actions = BufferList(1) # only ever need to look back 1

    def get_action(self, exchange_state):
        if len(self.adversary_historical_actions) == 0:
            action = Actions.NO_DEFECT
        else:
            if Actions.DEFECT in self.adversary_historical_actions.peek(0):
                action = Actions.DEFECT
            else:
                action = Actions.NO_DEFECT
        self.historical_actions.append(action)
        return action    
    
    def action_results_update(self, new_exchange_state, reward, done, info):
        self.adversary_historical_actions.append(info)

## Run Simple Test

In [5]:
def simple_reward_fn(twoPlayersActions):
    """Make a simple prisoners dilemma
    where the payoff matrix is as follows

    1 \ 2
            defect      no defect
    defect    (-7, -7)    (0, -10)
    no defect (-10, 0)    (-3, -3)

    """
    actionA, actionB = twoPlayersActions
    if actionA == actionB:
        if actionA == Actions.DEFECT:
            return (-7, -7)
        else:
            return (-3, -3)
    elif actionA == Actions.DEFECT:
        return (0, -10)
    else:
        return (-10, 0)

In [6]:
def simulate(players, steps):
    exchange = PrisonersDilemmaExchange(
        players, simple_reward_fn
    )

    exchange.simulate_steps(steps)

In [7]:
couple_of_altruists = [Altruist() for _ in range(2)]
simulate(couple_of_altruists, 10)

The rewards were (-3, -3) in round 0.
The rewards were (-3, -3) in round 1.
The rewards were (-3, -3) in round 2.
The rewards were (-3, -3) in round 3.
The rewards were (-3, -3) in round 4.
The rewards were (-3, -3) in round 5.
The rewards were (-3, -3) in round 6.
The rewards were (-3, -3) in round 7.
The rewards were (-3, -3) in round 8.
The rewards were (-3, -3) in round 9.


In [8]:
couple_of_antagonists = [Antagonist() for _ in range(2)]
simulate(couple_of_antagonists, 10)

The rewards were (-7, -7) in round 0.
The rewards were (-7, -7) in round 1.
The rewards were (-7, -7) in round 2.
The rewards were (-7, -7) in round 3.
The rewards were (-7, -7) in round 4.
The rewards were (-7, -7) in round 5.
The rewards were (-7, -7) in round 6.
The rewards were (-7, -7) in round 7.
The rewards were (-7, -7) in round 8.
The rewards were (-7, -7) in round 9.


In [9]:
altruist_and_titfortat = [Altruist(), TitForTat()]
simulate(altruist_and_titfortat, 10)

The rewards were (-3, -3) in round 0.
The rewards were (-3, -3) in round 1.
The rewards were (-3, -3) in round 2.
The rewards were (-3, -3) in round 3.
The rewards were (-3, -3) in round 4.
The rewards were (-3, -3) in round 5.
The rewards were (-3, -3) in round 6.
The rewards were (-3, -3) in round 7.
The rewards were (-3, -3) in round 8.
The rewards were (-3, -3) in round 9.


In [10]:
antagonist_and_titfortat = [Antagonist(), TitForTat()]
simulate(antagonist_and_titfortat, 10)

The rewards were (0, -10) in round 0.
The rewards were (-7, -7) in round 1.
The rewards were (-7, -7) in round 2.
The rewards were (-7, -7) in round 3.
The rewards were (-7, -7) in round 4.
The rewards were (-7, -7) in round 5.
The rewards were (-7, -7) in round 6.
The rewards were (-7, -7) in round 7.
The rewards were (-7, -7) in round 8.
The rewards were (-7, -7) in round 9.


In [12]:
titfortat_and_titfortat = [TitForTat(), TitForTat()]
simulate(titfortat_and_titfortat, 10)

The rewards were (-3, -3) in round 0.
The rewards were (-3, -3) in round 1.
The rewards were (-3, -3) in round 2.
The rewards were (-3, -3) in round 3.
The rewards were (-3, -3) in round 4.
The rewards were (-3, -3) in round 5.
The rewards were (-3, -3) in round 6.
The rewards were (-3, -3) in round 7.
The rewards were (-3, -3) in round 8.
The rewards were (-3, -3) in round 9.
