## iterated prisoner's dilemma

In [4]:
import numpy as np

In [103]:
def play_round(game, p0, p1, history):
    """
    Plays one round of a game whose payoff matrix is `game`,
    with two players whose strategies are the functions p1 and p2,
    with a past `history` of the players' actions, which are elements
    of {0, ..., n-1}, n being the number of rows of `game`.
    """
    # each player's action
    p0_action = p0(history, player=0)
    p1_action = p1(history, player=1)
    
    history.append((p0_action, p1_action))

    # each player's payoff
    p0_payoff = game[p0_action, p1_action]
    p1_payoff = game[p1_action, p0_action]
    
    return history, (p0_payoff, p1_payoff)


def play(game, p0, p1, initial_play, nrounds=1):
    """
    Play n rounds of `game` with players p0 and p1 starting
    from the tuple `initial_play`.
    """
    history = [initial_play]
    payoffs = []
    
    for i in range(n):
        history, payoff = play_round(game, p0, p1, history)
        payoffs.append(payoff)
        
    return history, payoffs


##### some strategies

def always_defect(history, player=0):
    """
    The strategy that always defects no matter what.
    """
    return 1


def always_cooperate(history, player=0):
    """
    The strategy that always defects no matter what.
    """
    return 0


def tit_for_tat(history, player=0):
    """
    Plays the opponent's last move.
    """
    last_play = history.pop()
    
    return last_play[not player]


def tit_for_two_tats(history, player=0):
    """
    Defect if the opponent defects twice in a row,
    otherwise cooperate.
    """
    last_play = history.pop()
    second_to_last_play = history.pop()
    
    return last_play[not player] and second_to_last_play[not player]


def grudger(history, player=0):
    """
    Cooperates until the first defection of the opponent,
    then defects forever.
    """
    nplays = len(history)
    opponent_history = [history[i][not player] for i in range(nplays)]
    
    return int(1 in opponent_history)


def random(history, player=0):
    """
    Fair coin flip.
    """
    
    return np.random.default_rng().choice([0,1])

In [115]:
# a payoff matrix for the prisoner's dilemma
# rows: player actions
# columns: opponent actions
# actions: 0 = cooperate, 1 = defect
prisoners_dilemma = np.array([[1, 0],
                              [3, 2]])

# both players cooperate initially
history = [(0, 0)]

history, payoff = play_round(prisoners_dilemma, random, always_cooperate, history)
print('history: ', history)
print('payoff: ', payoff)

history:  [(0, 0), (1, 0)]
payoff:  (3, 0)
