In [1]:
import matplotlib.pyplot as plt
import PathsGrid as p
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import nashpy as nash
import sys
np.set_printoptions(threshold=sys.maxsize)

In [2]:
player1 = p.Player([0, 0])
player2 = p.Player([2, 0])

grid = p.Grid(length = 3,
            width = 3,
            players = [player1, player2],
            obstacle_coordinates = [], 
            reward_coordinates = [1,2],
            reward_value = 100,
            collision_penalty = -10)

In [3]:
def create_strategies():
    strategies = []
    for i in player1.movements:
        for j in player1.movements:
            for k in player1.movements:
                strategies.append([i, j, k])
    return strategies

In [4]:
def get_payoff(strategie1, strategie2, through1, through2):
    player1_rewards = []
    player2_rewards = []
    player1 = p.Player([0, 0])
    player2 = p.Player([2, 0])
    current_state = [player1.position, player2.position]
    grid = p.Grid(length = 3,
            width = 3,
            players = [player1, player2],
            obstacle_coordinates = [], 
            reward_coordinates = [1,2],
            reward_value = 100,
            collision_penalty = -10)
    walls = grid.identify_walls()
    for i in range(len(strategie1)):

        if current_state == grid.reward_coordinates:
            break

        if [strategie1[i], current_state[0]] in walls:
            new_state_1 = current_state[0]
        else:
            new_state_1 = player1.move(strategie1[i], through1)
        
        if [strategie2[i], current_state[1]] in walls:
            new_state_2 = current_state[1]
        else:
            new_state_2 = player2.move(strategie2[i], through2)

        if new_state_1 != new_state_2:
            new_state = [new_state_1, new_state_2]
            player1.position = new_state_1
            player2.position = new_state_2
            player1_rewards.append(grid.compute_reward(current_state[0], new_state[0], strategie1[i]))
            player2_rewards.append(grid.compute_reward(current_state[1], new_state[1], strategie2[i]))
            current_state = new_state
        if [new_state_1, new_state_2] == [grid.reward_coordinates, grid.reward_coordinates]:
            new_state = [new_state_1, new_state_2]
            player1.position = new_state_1
            player2.position = new_state_2
            player1_rewards.append(grid.compute_reward(current_state[0], new_state[0], strategie1[i]))
            player2_rewards.append(grid.compute_reward(current_state[1], new_state[1], strategie2[i]))
            current_state = new_state
        if new_state_1 == new_state_2 and [new_state_1, new_state_2] != [grid.reward_coordinates, grid.reward_coordinates]:
            player1_rewards.append(grid.compute_reward(current_state[0], current_state[0], strategie1[i]))
            player2_rewards.append(grid.compute_reward(current_state[1], current_state[1], strategie2[i]))

    player1_rewards.append(-player1.barrier_bonus)
    player2_rewards.append(-player2.barrier_bonus)

    player1.barrier_counter = 0
    player2.barrier_counter = 0

    player1.barrier_bonus = 0
    player2.barrier_bonus = 0

    return sum(player1_rewards), sum(player2_rewards)

In [5]:
def utility_function(strategie1, strategie2):
    utility_1 = 0
    utility_2 = 0
    for i in range(2):
        for j in range(2):
            for k in range(2):
                for l in range(2):
                    for m in range(2):
                        for n in range(2):
                            ut1, ut2 = get_payoff(strategie1, strategie2, [i, j, k], [l, m, n])
                            utility_1 += ut1/64
                            utility_2 += ut2/64
    return utility_1, utility_2

In [6]:
def strategie_payoff_matrix():
    strategies = create_strategies()
    counter = 0
    payoff1 = np.zeros((len(strategies), len(strategies)))
    payoff2 = np.zeros((len(strategies), len(strategies)))
    # payoff1 = [[0 for i in range(len(strategies))] for j in range(len(strategies))]
    # payoff2 = [[0 for i in range(len(strategies))] for j in range(len(strategies))]
    for i in range(len(strategies)):
        for j in range(len(strategies)):
            player1_reward, player2_reward = utility_function(strategies[i], strategies[j])
            payoff1[i][j] = player1_reward
            payoff2[i][j] = player2_reward
            counter += 1
        print(f"Percentage: {100*counter/125**2}%")
    return payoff1, payoff2

In [7]:
strategies = create_strategies()

In [8]:
payoff_matrix_1, payoff_matrix_2 = strategie_payoff_matrix()

Percentage: 0.8%
Percentage: 1.6%
Percentage: 2.4%
Percentage: 3.2%
Percentage: 4.0%
Percentage: 4.8%
Percentage: 5.6%
Percentage: 6.4%
Percentage: 7.2%
Percentage: 8.0%
Percentage: 8.8%
Percentage: 9.6%
Percentage: 10.4%
Percentage: 11.2%
Percentage: 12.0%
Percentage: 12.8%
Percentage: 13.6%
Percentage: 14.4%
Percentage: 15.2%
Percentage: 16.0%
Percentage: 16.8%
Percentage: 17.6%
Percentage: 18.4%
Percentage: 19.2%
Percentage: 20.0%
Percentage: 20.8%
Percentage: 21.6%
Percentage: 22.4%
Percentage: 23.2%
Percentage: 24.0%
Percentage: 24.8%
Percentage: 25.6%
Percentage: 26.4%
Percentage: 27.2%
Percentage: 28.0%
Percentage: 28.8%
Percentage: 29.6%
Percentage: 30.4%
Percentage: 31.2%
Percentage: 32.0%
Percentage: 32.8%
Percentage: 33.6%
Percentage: 34.4%
Percentage: 35.2%
Percentage: 36.0%
Percentage: 36.8%
Percentage: 37.6%
Percentage: 38.4%
Percentage: 39.2%
Percentage: 40.0%
Percentage: 40.8%
Percentage: 41.6%
Percentage: 42.4%
Percentage: 43.2%
Percentage: 44.0%
Percentage: 44.8%
Perc

In [9]:
game = nash.Game(payoff_matrix_1, payoff_matrix_2)
equilibria = list(game.lemke_howson(9))

In [11]:
player1_strategy = strategies[[i for i in range(len(equilibria[0])) if equilibria[0][i] ==1.][0]]
player2_strategy = strategies[[i for i in range(len(equilibria[1])) if equilibria[1][i] ==1.][0]]

print(f"The strategy of player 1 in the equilbrium is {player1_strategy}")
print(f"The strategy of player 2 in the equilbrium is {player2_strategy}")

The strategy of player 1 in the equilbrium is ['up', 'up', 'right']
The strategy of player 2 in the equilbrium is ['left', 'up', 'up']


In [12]:
def get_best_response(strategy):
    best_response = None
    best_utility = -10**8
    strategies = create_strategies()
    for i in range(len(strategies)):
        if payoff_matrix_2[strategy][i] > best_utility:
            best_utility = payoff_matrix_2[strategy][i]
            best_response = i
    return strategies[best_response]

In [13]:
s11 = strategies.index(['up', 'up', 'right'])
s21 = strategies.index(['right', 'up', 'up'])
s12 = strategies.index(['left', 'up', 'up'])
s22 = strategies.index(['up', 'up', 'left'])

In [14]:
print(f'The best response for strategy {strategies[s11]} of player 1 is for player 2 to play {get_best_response(s11)}')
print(f'The best response for strategy {strategies[s21]} of player 1 is for player 2 to play {get_best_response(s21)}')

The best response for strategy ['up', 'up', 'right'] of player 1 is for player 2 to play ['left', 'up', 'up']
The best response for strategy ['right', 'up', 'up'] of player 1 is for player 2 to play ['up', 'up', 'left']
