## Train Deep-CFR Agent for Leduc

In [1]:
import time, sys
from os.path import dirname, abspath

sys.path.append("/home/leduc/Deep-CFR/")

In [7]:
import numpy as np

from PokerRL.game.games import StandardLeduc  # or any other game

from DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR
from DeepCFR.TrainingProfile import TrainingProfile
from DeepCFR.workers.driver.Driver import Driver

def generate_cfr_agent(n_iters=100):
    ctrl = Driver(t_prof=TrainingProfile(name="SD-CFR_LEDUC_EXAMPLE_"+str(n_iters),
                                         nn_type="feedforward",
                                         max_buffer_size_adv=3e6,
                                         eval_agent_export_freq=20,  # export API to play against the agent
                                         n_traversals_per_iter=1500,
                                         n_batches_adv_training=750,
                                         n_batches_avrg_training=2000,
                                         n_merge_and_table_layer_units_adv=64,
                                         n_merge_and_table_layer_units_avrg=64,
                                         n_units_final_adv=64,
                                         n_units_final_avrg=64,
                                         mini_batch_size_adv=2048,
                                         mini_batch_size_avrg=2048,
                                         init_adv_model="last",
                                         init_avrg_model="last",
                                         use_pre_layers_adv=False,
                                         use_pre_layers_avrg=False,

                                         game_cls=StandardLeduc,

                                         # You can specify one or both modes. Choosing both is useful to compare them.
                                         eval_modes_of_algo=(
                                             EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                                             EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
                                         ),

                                         DISTRIBUTED=False,
                                         ),
                  eval_methods={
                      "br": 3,
                  },
                  n_iterations=n_iters)
    ctrl.run()
    ctrl.chief_handle.export_agent(step=ctrl.n_iterations) ## agent saved under poker_ai_data
    print("Saved agent.")

In [None]:
for n_iters in [2, 5, 10, 30, 100]:
    generate_cfr_agent(n_iters)

 ************************** Initing args for:  SD-CFR_LEDUC_EXAMPLE_2   **************************
Creating Chief...
Creating BR Evaluator...
Tree with stack size [13, 13] has 464 nodes out of which 190 are non-terminal.
Creating LAs...
Creating Parameter Servers...
Created and initialized Workers
Setting stuff up...
Starting Training...
Iteration:  0
Training Average Nets...
Evaluating vs. BR
Exporting agent
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  30.351865530014038s.   ||  Trained ADV 29.27119541168213s.   ||  Synced ADV 2.386425495147705s. 

Trained AVRG 0.0s.   ||  Synced AVRG 0.0s. 

Iteration:  1
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  21.42894148826599s.   ||  Trained ADV 15.914569854736328s.   ||  Syn

Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  30.020787954330444s.   ||  Trained ADV 21.653237104415894s.   ||  Synced ADV 2.489680290222168s. 

Trained AVRG 64.39473032951355s.   ||  Synced AVRG 7.406674385070801s. 

Iteration:  4
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  32.731674671173096s.   ||  Trained ADV 19.693359375s.   ||  Synced ADV 2.2512307167053223s. 

Iteration:  5
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  32.1916081905365s.   ||  Trained ADV 31.30985999107361s.   ||  Synced ADV 3.2721028327941895s. 

Iteration:  6
Training Average Nets...
Evaluating vs. BR
Generatin

Generating Data...
Training Advantage Net...
