In [1]:
!python -m pip install nashpy # install nashpy library



In [2]:
import nashpy as nash
import numpy as np
# c.f. https://nashpy.readthedocs.io/en/stable/index.html

## Example

Rock Paper Scizors

In [6]:

# payoff matrix for the first player
rock_paper_scizors_matrix = np.array([
    #R  #P  #S
    [0, -1, 1], # R
    [1, 0, -1], # P
    [-1, 1, 0]  # S
])

# The game is a two-player zero-sum game so we only need to feed the first player payoff matrix
game = nash.Game(rock_paper_scizors_matrix)
print(game)


policy_a = np.array([1, 0, 0]) # probability for each action, R, P, S
policy_b = np.array([0, 1, 0])
# Utility for each player, if they follow the above policies
print("Utility if first player chooses Rock and second player chooses Paper", game[policy_a, policy_b])
print("Best responses ?", game.is_best_response(policy_a, policy_b))

policy_a = np.ones(3, dtype=np.float32) / 3 # random policy
# calculate the best response
nash_eq = game.support_enumeration()
print("Nash equilibrium:")
print(list(nash_eq)) # nash equilibrium is fully random policies for both sides (1/3 probability for each action)

Zero sum game with payoff matrices:

Row player:
[[ 0 -1  1]
 [ 1  0 -1]
 [-1  1  0]]

Column player:
[[ 0  1 -1]
 [-1  0  1]
 [ 1 -1  0]]
Utility if first player chooses Rock and second player chooses Paper [-1  1]
Best responses ? (False, True)
Nash equilibrium:
[(array([0.33333333, 0.33333333, 0.33333333]), array([0.33333333, 0.33333333, 0.33333333]))]


# 1) A Medieval Duel (Zero-Sum Game)

#### Define the following 2-player zero-sum game where:
There is 4 actions:

1. Hide
2. Charge with a sword
3. Shoot an arrow 
4. Scoot
    
#### Description:
You engage in a fight with your opponent, which is taking place in the middle of a forest. Both of you own a bow with a couple arrows, and a sword:
- If you hide, your opponent cannot find you, unless he is scooting.
- Charging costs some energy, and shooting spends an arrow.
- Scooting makes you defenseless if your opponent is charging or shooting.

Goals:
- Define the payoff (your choice) for the first player for each action, in function of the action of the opponent.
- Construct the game matrix.
- Compute the nash equilibriums of your game.

In [None]:

# For each action, define their payoff when the other player picks their own action 


game_matrix = np.array([
    [...],
    [...],
    [...],
    [...]
])

game = nash.Game(game_matrix)
nash_eq = game.linear_program()
print("Nash equilibrium:")
print(list(nash_eq))
# Can you interpret the nash equilibrium found ?

# General-Sum Games

# 2) Chicken
The Chicken Dilemma: https://en.wikipedia.org/wiki/Chicken_(game)

Goal: Define the Chicken Dilemma game matrices and determine its nash equilibria for different car costs!


In [None]:
game_matrix_1 = np.array([
    [0, 1],
    [-1, -c],
])
game_matrix_2 = np.array([
    [0, -1],
    [1, -c],
])

# This is a general sum game, we need to feed the two matrices for each player
game = nash.Game(game_matrix_1, game_matrix_2)
nash_eq = list(game.support_enumeration())
print(nash_eq)

# 3) Prisoner's Dilemma

The Prisoner's Dilemma: https://en.wikipedia.org/wiki/Prisoner%27s_dilemma

Goal: Define the Prisoner's Dilemma game matrices and determine its nash equilibria.

# 4) Stag Hunt

The Stag Hunt problem: https://en.wikipedia.org/wiki/Stag_hunt

Similarly to the Prisoner's Dilemma, define the game matrix and determine its nash equilibria

In [20]:

np.random.uniform(size=2)

array([0.41217476, 0.43437682])

In [62]:
## Input: game matrix with form
## game[player, p1move, p2move, .... pnmove]
def regret_minimiser(game, lr, T):
    n_players = game.shape[0]
    n_actions1 = game.shape[1]
    n_actions2 = game.shape[2]
    actions = np.zeros([T, 2])
    policy = [np.random.uniform(size=n_actions1), np.random.uniform(size=n_actions2)]
    for i in range(2): policy[i] /= sum(policy[i])

    reward = [0,0]
    for t in range(T):
        #print(policy)
        action1 = np.random.choice(n_actions1, p = policy[0])
        action2 = np.random.choice(n_actions2, p = policy[1])
        for a in range(n_actions1):
            policy[0][a] *= np.exp(lr * game[0, a, action2])
        policy[0] /= sum(policy[0])
        for a in range(n_actions2):
            policy[1][a] *= np.exp(lr * game[1, action1, a])
        policy[1] /= sum(policy[1])
        actions[t,:] = [action1, action2]
        reward = game[:, action1, action2]
    return actions

In [66]:
rock_paper_scizors_matrix = np.array([
    #R  #P  #S
    [0, -1, 1], # R
    [1, 0, -1], # P
    [-1, 1, 0]  # S
])
A = regret_minimiser(np.stack([rock_paper_scizors_matrix, -rock_paper_scizors_matrix]), 0.01, 10000)
np.mean(A[0,:]==2)

0.0