<a href="https://colab.research.google.com/github/krvicky/open_spiel/blob/main/Openspiel_CFR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
# Install required dependencies
!pip install open-spiel  # This might take some time

# Import necessary libraries
from open_spiel.python.algorithms import cfr
from open_spiel.python.algorithms import exploitability
from open_spiel.python.algorithms import expected_game_score
import pyspiel



In [13]:


# Define configuration parameters directly
my_iterations = 100
my_game = "kuhn_poker"
players = 2
print_freq = 10

# Load the game using the specified game name
game = pyspiel.load_game(my_game, {"players": players})
cfr_solver = cfr.CFRSolver(game)

# Run your iterations
for i in range(my_iterations):
    cfr_solver.evaluate_and_update_policy()
    if i % print_freq == 0:
        conv = exploitability.exploitability(game, cfr_solver.average_policy())
        print("Iteration {} exploitability {}".format(i, conv))


Iteration 0 exploitability 0.45833333333333326
Iteration 10 exploitability 0.06046924690611866
Iteration 20 exploitability 0.039914275345009825
Iteration 30 exploitability 0.024167348753902612
Iteration 40 exploitability 0.020517348345035824
Iteration 50 exploitability 0.014479024570810684
Iteration 60 exploitability 0.014003542854660017
Iteration 70 exploitability 0.011778275229671564
Iteration 80 exploitability 0.010102437916053336
Iteration 90 exploitability 0.00983365028965788


In [7]:
average_policy = cfr_solver.average_policy()

In [17]:
game.action_to_string(0,0)
game.get_parameters()

{'players': 2}

In [22]:
game.action_to_string(0,1)

'Action(id=1, player=0)'

In [23]:


# Load the game
game = pyspiel.load_game("kuhn_poker")

# Create a CFR solver
cfr_solver = cfr.CFRSolver(game)
iterations = 1000

# Run CFR iterations
for i in range(iterations):
    cfr_value = cfr_solver.evaluate_and_update_policy()
    print("Game util at iteration {}: {}".format(i, cfr_value))

# Compute and print player 0's value using the average policy
average_policy = cfr_solver.average_policy()
average_policy_values = expected_game_score.policy_value(
    game.new_initial_state(), [average_policy] * 2)
print("Computed player 0 value: {}".format(average_policy_values[0]))
print("Expected player 0 value: {}".format(-1 / 18))


Game util at iteration 0: None
Game util at iteration 1: None
Game util at iteration 2: None
Game util at iteration 3: None
Game util at iteration 4: None
Game util at iteration 5: None
Game util at iteration 6: None
Game util at iteration 7: None
Game util at iteration 8: None
Game util at iteration 9: None
Game util at iteration 10: None
Game util at iteration 11: None
Game util at iteration 12: None
Game util at iteration 13: None
Game util at iteration 14: None
Game util at iteration 15: None
Game util at iteration 16: None
Game util at iteration 17: None
Game util at iteration 18: None
Game util at iteration 19: None
Game util at iteration 20: None
Game util at iteration 21: None
Game util at iteration 22: None
Game util at iteration 23: None
Game util at iteration 24: None
Game util at iteration 25: None
Game util at iteration 26: None
Game util at iteration 27: None
Game util at iteration 28: None
Game util at iteration 29: None
Game util at iteration 30: None
Game util at itera

#Exploring the state variables

Running one complete iteration of the game and printing the different state variables

In [85]:
import random
import pyspiel
import numpy as np

game = pyspiel.load_game("kuhn_poker")
state = game.new_initial_state()
while not state.is_terminal():

  print()
  print("Move number... ", state.move_number())
  print("Start of loop....")
  print("is chance node? ",state.is_chance_node())
  print("is player node? ",state.is_player_node())
  legal_actions = state.legal_actions()
  print("Current player is....",state.current_player())
  print("Legal actions are....", legal_actions)
  print("Information state string for 0...",state.information_state_string(0))
  print("Observation string for 0...",state.observation_string(0))
  print("Information state string for 1...",state.information_state_string(1))
  print("Observation string for 1...",state.observation_string(1))

  if state.is_chance_node():
    # Sample a chance event outcome.
    outcomes_with_probs = state.chance_outcomes()
    print(outcomes_with_probs)
    action_list, prob_list = zip(*outcomes_with_probs)
    action = np.random.choice(action_list, p=prob_list)
    print("Action is... ", action)
    state.apply_action(action)
  else:
    # The algorithm can pick an action based on an observation (fully observable
    # games) or an information state (information available for that player)
    # We arbitrarily select the first available action as an example.
    action = legal_actions[state.current_player()]
    print("Action is... ", action)
    state.apply_action(action)

print()
print("Entire history is ",state.history())
print("Returns is... ",state.returns())
print("Rewards is....",state.rewards())


Move number...  0
Start of loop....
is chance node?  True
is player node?  False
Current player is.... -1
Legal actions are.... [0, 1, 2]
Information state string for 0... 
Observation string for 0... 
Information state string for 1... 
Observation string for 1... 
[(0, 0.3333333333333333), (1, 0.3333333333333333), (2, 0.3333333333333333)]
Action is...  0

Move number...  1
Start of loop....
is chance node?  True
is player node?  False
Current player is.... -1
Legal actions are.... [1, 2]
Information state string for 0... 0
Observation string for 0... 011
Information state string for 1... 
Observation string for 1... 
[(1, 0.5), (2, 0.5)]
Action is...  1

Move number...  2
Start of loop....
is chance node?  False
is player node?  True
Current player is.... 0
Legal actions are.... [0, 1]
Information state string for 0... 0
Observation string for 0... 011
Information state string for 1... 1
Observation string for 1... 111
Action is...  0

Move number...  3
Start of loop....
is chance no

'211'