# Task 2: 'Learning & Dynamics' in OpenSpiel
Documentation: https://github.com/deepmind/open_spiel/blob/master/open_spiel/matrix_game.h

In [103]:
%autoreload 2
from part2_utils import *

import numpy as np
import pyspiel

from open_spiel.python.algorithms import random_agent

### Biased Rock-Paper-Scissors
-> zero-sum game

#### Initialization of Biased RPS

In [100]:
#configure game
row_player_utils = [[0, -0.25, 0.5], [0.25, 0, -0.05], [-0.5, 0.05, 0]]
col_player_utils = [[0, 0.25, -0.5], [-0.25, 0, 0.05], [0.5, -0.05, 0]]
short_name = 'biased_rps'
long_name = 'Biased Rock-Paper-Scissors'
row_names, col_names = ['P1_Rock', 'P1_Paper', 'P1_Scissors'], ['P2_Rock', 'P2_Paper', 'P2_Scissors']

#initialize game
biased_rps_game = pyspiel.create_matrix_game(short_name, long_name, row_names, col_names, row_player_utils, col_player_utils)

#display game
print('-------GAME---------'+'\n'+str(biased_rps_game))
print('--------------------'+'\n')

#initialize first state of game
state = biased_rps_game.new_initial_state()

#display state
print('-------STATE--------'+'\n'+str(state))
print('--------------------'+'\n')

-------GAME---------
biased_rps()
--------------------

-------STATE--------
Terminal? false
Row actions: P1_Rock P1_Paper P1_Scissors 
Col actions: P2_Rock P2_Paper P2_Scissors 
Utility matrix:
0,0 -0.25,0.25 0.5,-0.5 
0.25,-0.25 0,0 -0.05,0.05 
-0.5,0.5 0.05,-0.05 0,0 

--------------------



#### Independent learning in Biased RPS

In [109]:
q_learned_agent1, q_learned_agent2 = train(biased_rps_game)

Episodes: 0
Episodes: 1000
Episodes: 2000
Episodes: 3000
Episodes: 4000
Episodes: 5000
Episodes: 6000
Episodes: 7000
Episodes: 8000
Episodes: 9000
Episodes: 10000
Episodes: 11000
Episodes: 12000
Episodes: 13000
Episodes: 14000
Episodes: 15000
Episodes: 16000
Episodes: 17000
Episodes: 18000
Episodes: 19000
Episodes: 20000
Episodes: 21000
Episodes: 22000
Episodes: 23000
Episodes: 24000
Done!


#### Play game against random agent

In [113]:
play_game(biased_rps_game, q_learned_agent1, random_agent.RandomAgent(player_id=1, num_actions=3))
#play_game(biased_rps_game, q_learned_agent1, q_learned_agent2)

[0.0, 0.0]
[0.25, -0.25]
[0.0, 0.0]
[-0.05, 0.05]
[0.25, -0.25]
[0.0, 0.0]
[0.0, 0.0]
[0.25, -0.25]
[-0.05, 0.05]
[0.0, 0.0]


### Dispersion Game
-> social dilemma

In [96]:
#configure game
row_player_utils = [[-1, 1], [1, -1]]
col_player_utils = [[-1, 1], [1, -1]]
short_name = 'dg'
long_name = 'Dispersion Game'
row_names, col_names = ['P1_A', 'P1_B'], ['P2_A', 'P2_B']

#initialize game
dg_game = pyspiel.create_matrix_game(short_name, long_name, row_names, col_names, row_player_utils, col_player_utils)

#display game
print('-------GAME---------'+'\n'+str(dg_game))
print('--------------------'+'\n')

#initialize first state of game
state = dg_game.new_initial_state()

#display state
print('-------STATE--------'+'\n'+str(state))
print('--------------------'+'\n')

-------GAME---------
dg()
--------------------

-------STATE--------
Terminal? false
Row actions: P1_A P1_B 
Col actions: P2_A P2_B 
Utility matrix:
-1,-1 1,1 
1,1 -1,-1 

--------------------



#### Independent learning in Dispersion Game

In [116]:
q_learned_agent1, q_learned_agent2 = train(dg_game)

Episodes: 0
Episodes: 1000
Episodes: 2000
Episodes: 3000
Episodes: 4000
Episodes: 5000
Episodes: 6000
Episodes: 7000
Episodes: 8000
Episodes: 9000
Episodes: 10000
Episodes: 11000
Episodes: 12000
Episodes: 13000
Episodes: 14000
Episodes: 15000
Episodes: 16000
Episodes: 17000
Episodes: 18000
Episodes: 19000
Episodes: 20000
Episodes: 21000
Episodes: 22000
Episodes: 23000
Episodes: 24000
Done!


#### Play game against another agent

In [124]:
#play_game(dg_game, q_learned_agent1, random_agent.RandomAgent(player_id=1, num_actions=3))
play_game(dg_game, q_learned_agent1, q_learned_agent2)

[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]
[1.0, 1.0]


### Battle of the Sexes
-> cooperation game

In [126]:
#configure game
row_player_utils = [[3, 0], [0, 2]]
col_player_utils = [[2, 0], [0, 3]]
short_name = 'bots'
long_name = 'Battle of the Sexes'
row_names, col_names = ['P1_O', 'P1_M'], ['P2_O', 'P2_M']

#initialize game
bots_game = pyspiel.create_matrix_game(short_name, long_name, row_names, col_names, row_player_utils, col_player_utils)

#display game
print('-------GAME---------'+'\n'+str(bots_game))
print('--------------------'+'\n')

#initialize first state of game
state = bots_game.new_initial_state()

#display state
print('-------STATE--------'+'\n'+str(state))
print('--------------------'+'\n')

-------GAME---------
bots()
--------------------

-------STATE--------
Terminal? false
Row actions: P1_O P1_M 
Col actions: P2_O P2_M 
Utility matrix:
3,2 0,0 
0,0 2,3 

--------------------



#### Independent learning in Battle of the Sexes

In [127]:
q_learned_agent1, q_learned_agent2 = train(bots_game)

Episodes: 0
Episodes: 1000
Episodes: 2000
Episodes: 3000
Episodes: 4000
Episodes: 5000
Episodes: 6000
Episodes: 7000
Episodes: 8000
Episodes: 9000
Episodes: 10000
Episodes: 11000
Episodes: 12000
Episodes: 13000
Episodes: 14000
Episodes: 15000
Episodes: 16000
Episodes: 17000
Episodes: 18000
Episodes: 19000
Episodes: 20000
Episodes: 21000
Episodes: 22000
Episodes: 23000
Episodes: 24000
Done!


#### Play game against another agent

In [129]:
play_game(bots_game, q_learned_agent1, random_agent.RandomAgent(player_id=1, num_actions=3))
#play_game(bots_game, q_learned_agent1, q_learned_agent2)

[0.0, 0.0]
[0.0, 0.0]
[0.0, 0.0]
[0.0, 0.0]
[0.0, 0.0]
[3.0, 2.0]
[0.0, 0.0]
[3.0, 2.0]
[3.0, 2.0]
[3.0, 2.0]


### Subsidy Game

In [132]:
#configure game
row_player_utils = [[10, 0], [11, 12]]
col_player_utils = [[10, 11], [0, 12]]
short_name = 'sg'
long_name = 'Subsidy Game'
row_names, col_names = ['P1_S1', 'P1_S2'], ['P2_S1', 'P2_S2']

#initialize game
sg_game = pyspiel.create_matrix_game(short_name, long_name, row_names, col_names, row_player_utils, col_player_utils)

#display game
print('-------GAME---------'+'\n'+str(sg_game))
print('--------------------'+'\n')

#initialize first state of game
state = sg_game.new_initial_state()

#display state
print('-------STATE--------'+'\n'+str(state))
print('--------------------'+'\n')

-------GAME---------
sg()
--------------------

-------STATE--------
Terminal? false
Row actions: P1_S1 P1_S2 
Col actions: P2_S1 P2_S2 
Utility matrix:
10,10 0,11 
11,0 12,12 

--------------------



#### Independent learning in Subsidy Game

In [133]:
q_learned_agent1, q_learned_agent2 = train(sg_game)

Episodes: 0
Episodes: 1000
Episodes: 2000
Episodes: 3000
Episodes: 4000
Episodes: 5000
Episodes: 6000
Episodes: 7000
Episodes: 8000
Episodes: 9000
Episodes: 10000
Episodes: 11000
Episodes: 12000
Episodes: 13000
Episodes: 14000
Episodes: 15000
Episodes: 16000
Episodes: 17000
Episodes: 18000
Episodes: 19000
Episodes: 20000
Episodes: 21000
Episodes: 22000
Episodes: 23000
Episodes: 24000
Done!


#### Play game against another agent

In [137]:
play_game(sg_game, q_learned_agent1, random_agent.RandomAgent(player_id=1, num_actions=3))
#play_game(sg_game, q_learned_agent1, q_learned_agent2)

[12.0, 12.0]
[11.0, 0.0]
[12.0, 12.0]
[12.0, 12.0]
[11.0, 0.0]
[12.0, 12.0]
[11.0, 0.0]
[11.0, 0.0]
[12.0, 12.0]
[11.0, 0.0]
