<a href="https://colab.research.google.com/github/krvicky/open_spiel/blob/main/Openspiel_CFR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required dependencies
!pip install open-spiel  # This might take some time

# Import necessary libraries
from open_spiel.python.algorithms import cfr
from open_spiel.python.algorithms import exploitability
from open_spiel.python.algorithms import expected_game_score
import pyspiel

Collecting open-spiel
  Downloading open_spiel-1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: open-spiel
Successfully installed open-spiel-1.3


#Exploring the state variables

Running one complete iteration of the game and printing the different state variables

In [4]:
import random
import pyspiel
import numpy as np

game = pyspiel.load_game("kuhn_poker")
state = game.new_initial_state()
while not state.is_terminal():

  print()
  print("Move number... ", state.move_number())
  print("Start of loop....")
  print("is chance node? ",state.is_chance_node())
  print("is player node? ",state.is_player_node())
  legal_actions = state.legal_actions()
  print("Current player is....",state.current_player())
  print("Legal actions are....", legal_actions)
  print("Information state string for 0...",state.information_state_string(0))
  print("Observation string for 0...",state.observation_string(0))
  print("Information state string for 1...",state.information_state_string(1))
  print("Observation string for 1...",state.observation_string(1))

  if state.is_chance_node():
    # Sample a chance event outcome.
    outcomes_with_probs = state.chance_outcomes()
    print(outcomes_with_probs)
    action_list, prob_list = zip(*outcomes_with_probs)
    action = np.random.choice(action_list, p=prob_list)
    print("Action is... ", action)
    state.apply_action(action)
  else:
    # The algorithm can pick an action based on an observation (fully observable
    # games) or an information state (information available for that player)
    # We arbitrarily select the first available action as an example.

    action = legal_actions[state.current_player()]
    print("Action is... ", action)
    state.apply_action(action)

print()
print("Entire history is ",state.history())
print("Returns is... ",state.returns())
print("Rewards is....",state.rewards())


Move number...  0
Start of loop....
is chance node?  True
is player node?  False
Current player is.... -1
Legal actions are.... [0, 1, 2]
Information state string for 0... 
Observation string for 0... 
Information state string for 1... 
Observation string for 1... 
[(0, 0.3333333333333333), (1, 0.3333333333333333), (2, 0.3333333333333333)]
Action is...  2

Move number...  1
Start of loop....
is chance node?  True
is player node?  False
Current player is.... -1
Legal actions are.... [0, 1]
Information state string for 0... 2
Observation string for 0... 211
Information state string for 1... 
Observation string for 1... 
[(0, 0.5), (1, 0.5)]
Action is...  1

Move number...  2
Start of loop....
is chance node?  False
is player node?  True
Current player is.... 0
Legal actions are.... [0, 1]
Information state string for 0... 2
Observation string for 0... 211
Information state string for 1... 1
Observation string for 1... 111
Action is...  0

Move number...  3
Start of loop....
is chance no

**CFR implementation**

In [5]:
from open_spiel.python.algorithms import cfr

In [6]:
# Assign flag values to variables
my_iterations = 100
my_game = "kuhn_poker"
players = 2
print_freq = 25

# Load the game using the renamed variable
game = pyspiel.load_game(my_game, {"players": players})
cfr_solver = cfr.CFRSolver(game)

# Run your iterations
for i in range(my_iterations):
    cfr_solver.evaluate_and_update_policy()
    if i % print_freq == 0:
        conv = exploitability.exploitability(game, cfr_solver.average_policy())
        print("Iteration {} exploitability {}".format(i, conv))

Iteration 0 exploitability 0.45833333333333326
Iteration 25 exploitability 0.029921334314481435
Iteration 50 exploitability 0.014479024570810684
Iteration 75 exploitability 0.009339085767700456


#Implementing CFR and checking results

Algorithm is in this [Miro board](https://miro.com/welcomeonboard/VmRQRU5kc2wybjBacTdJUHBhSkN6RW5wOGUwbjRDSWlHd2dtRkhNTjd3Mmh5amMwUjNsMWI4Rnl0a3hnWFQyeHwzNDU4NzY0NTY2NDM5NzE3OTkwfDI=?share_link_id=754191260450)

In [75]:
#Define the parameters
episodes = 10000
my_iterations = 100
my_game = "kuhn_poker"
players = 2
print_freq = 1000

In [43]:
#Strategy function for player 0
#follows uniform strategy
#takes state as inputs.  Computes the legal actions at that state and selects one at random with equal probability

def action_for_player(state):
  legal_actions = state.legal_actions()
  return random.choice(legal_actions)

In [61]:
#Strategy function for player 1
#follows CFR strategy

#Train a CFR model
from open_spiel.python.algorithms import cfr
cfr_game = pyspiel.load_game("kuhn_poker")
# Create the CFR solver and run CFR iterations to compute a strategy
cfr_solver = cfr.CFRSolver(cfr_game)
iterations = 10000
for i in range(iterations):
    cfr_solver.evaluate_and_update_policy()
    if i % print_freq == 0:
        conv = exploitability.exploitability(game, cfr_solver.average_policy())
        print("Iteration {} exploitability {}".format(i, conv))

# Obtain the average policy from CFR
average_policy = cfr_solver.average_policy()

Iteration 0 exploitability 0.45833333333333326
Iteration 1000 exploitability 0.0009701106073763677
Iteration 2000 exploitability 0.0005293432339987247
Iteration 3000 exploitability 0.0004150519969013944
Iteration 4000 exploitability 0.0002712135609054256
Iteration 5000 exploitability 0.0001806005218573381
Iteration 6000 exploitability 0.00020676484354964497
Iteration 7000 exploitability 0.0001306464473274649
Iteration 8000 exploitability 0.00016224104352607904
Iteration 9000 exploitability 0.0001315032124148685


In [68]:
#Get next action from the trained CFR strategy
def cfr_strategy(state):
    action_with_prob = average_policy.action_probabilities(state, state.current_player())

    chosen_action = max(action_with_prob, key=action_with_prob.get)
    return chosen_action

In [76]:
#Loop to run through different episodes
game = pyspiel.load_game(my_game)
test_player_returns = 0
cfr_player_returns = 0
test_player_wins = 0
cfr_player_wins = 0

for i in range(episodes):
  state = game.new_initial_state()

  #Run a single episode
  while not state.is_terminal():
    legal_actions = state.legal_actions()

    if state.is_chance_node():
      # Sample a chance event outcome.
      outcomes_with_probs = state.chance_outcomes()
      action_list, prob_list = zip(*outcomes_with_probs)
      action = np.random.choice(action_list, p=prob_list)
      state.apply_action(action)
    else:
      if state.current_player() == 0:
        #This is the player with suboptimal strategy
        action = action_for_player(state)
        state.apply_action(action)
      else:
        #This is the player with CFR strategy
        action = cfr_strategy(state)
        state.apply_action(action)

  #Print entire history of the episode
  #print(f"Entire history of episode {i} is {state.history()}")
  #Accumulate the returns for each player
  episode_return = state.returns()
  if episode_return[0] > episode_return[1]:
    test_player_wins += 1
    #print("test player wins")
  else:
    cfr_player_wins += 1
    #print("cfr player wins")
  test_player_returns += episode_return[0]
  cfr_player_returns += episode_return[1]

print("Total number of episodes is ", episodes)
print("Number of wins for test player is ", test_player_wins)
print("Number of wins for cfr player is ", cfr_player_wins)
test_player_win_rate = (test_player_wins / episodes) * 100
cfr_player_win_rate = (cfr_player_wins / episodes) * 100
print("Test player win rate is: {:.2f}%".format(test_player_win_rate))
print("CFR player win rate is: {:.2f}%".format(cfr_player_win_rate))



Total number of episodes is  10000
Number of wins for test player is  5828
Number of wins for cfr player is  4172
Test player win rate is: 58.28%
CFR player win rate is: 41.72%
