## Train Deep-CFR Agent for Leduc

In [1]:
import time, sys
from os.path import dirname, abspath

sys.path.append("/home/leduc/Deep-CFR/")

In [2]:
import numpy as np

from PokerRL.game.games import StandardLeduc  # or any other game

from DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR
from DeepCFR.TrainingProfile import TrainingProfile
from DeepCFR.workers.driver.Driver import Driver

def generate_cfr_agent(n_iters=100):
    ctrl = Driver(t_prof=TrainingProfile(name="SD-CFR_LEDUC_EXAMPLE_"+str(n_iters),
                                         nn_type="feedforward",
                                         max_buffer_size_adv=3e6,
                                         eval_agent_export_freq=20,  # export API to play against the agent
                                         n_traversals_per_iter=1500,
                                         n_batches_adv_training=750,
                                         n_batches_avrg_training=2000,
                                         n_merge_and_table_layer_units_adv=64,
                                         n_merge_and_table_layer_units_avrg=64,
                                         n_units_final_adv=64,
                                         n_units_final_avrg=64,
                                         mini_batch_size_adv=2048,
                                         mini_batch_size_avrg=2048,
                                         init_adv_model="last",
                                         init_avrg_model="last",
                                         use_pre_layers_adv=False,
                                         use_pre_layers_avrg=False,

                                         game_cls=StandardLeduc,

                                         # You can specify one or both modes. Choosing both is useful to compare them.
                                         eval_modes_of_algo=(
                                             EvalAgentDeepCFR.EVAL_MODE_SINGLE,  # SD-CFR
                                             EvalAgentDeepCFR.EVAL_MODE_AVRG_NET,  # Deep CFR
                                         ),

                                         DISTRIBUTED=False,
                                         ),
                  eval_methods={
                      "br": 3,
                  },
                  n_iterations=n_iters)
    ctrl.run()
    ctrl.chief_handle.export_agent(step=ctrl.n_iterations) ## agent saved under poker_ai_data
    print("Saved agent.")

In [None]:
for n_iters in [200]:
    generate_cfr_agent(n_iters)

 ************************** Initing args for:  SD-CFR_LEDUC_EXAMPLE_200   **************************
Creating Chief...
Creating BR Evaluator...
Tree with stack size [13, 13] has 464 nodes out of which 190 are non-terminal.
Creating LAs...
Creating Parameter Servers...
Created and initialized Workers
Setting stuff up...
Starting Training...
Iteration:  0
Training Average Nets...
Evaluating vs. BR
Exporting agent
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  29.50255560874939s.   ||  Trained ADV 51.92070007324219s.   ||  Synced ADV 2.6526036262512207s. 

Trained AVRG 0.0s.   ||  Synced AVRG 0.0s. 

Iteration:  1
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  26.696146726608276s.   ||  Trained ADV 35.01076674461365s.   ||  S

Evaluating vs. BR
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  31.34698176383972s.   ||  Trained ADV 38.00226616859436s.   ||  Synced ADV 2.9643194675445557s. 

Trained AVRG 79.99724054336548s.   ||  Synced AVRG 6.265047788619995s. 

Iteration:  25
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  34.452099084854126s.   ||  Trained ADV 26.57880663871765s.   ||  Synced ADV 2.437235116958618s. 

Iteration:  26
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  32.65480017662048s.   ||  Trained ADV 30.234855890274048s.   ||  Synced ADV 2.588279962539673s. 

Iteration:  27
Training Average Nets...
Ev

Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  32.709619760513306s.   ||  Trained ADV 28.88380241394043s.   ||  Synced ADV 2.915855646133423s. 

Iteration:  50
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  32.80039620399475s.   ||  Trained ADV 24.114280939102173s.   ||  Synced ADV 2.1594808101654053s. 

Iteration:  51
Training Average Nets...
Evaluating vs. BR
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  32.6098849773407s.   ||  Trained ADV 26.682167768478394s.   ||  Synced ADV 2.7858502864837646s. 

Trained AVRG 87.87719750404358s.   ||  Synced AVRG 5.680667161941528s. 

Iteration:  52
G

Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  32.79619002342224s.   ||  Trained ADV 36.43665909767151s.   ||  Synced ADV 2.571838140487671s. 

Iteration:  75
Training Average Nets...
Evaluating vs. BR
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  29.296754121780396s.   ||  Trained ADV 33.80748987197876s.   ||  Synced ADV 2.246035099029541s. 

Trained AVRG 83.7460823059082s.   ||  Synced AVRG 6.40618109703064s. 

Iteration:  76
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  29.441888570785522s.   ||  Trained ADV 27.89240550994873s.   ||  Synced ADV 2.4786338806152344s. 

Iteration:  77
Gene

Evaluating vs. BR
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  33.486419916152954s.   ||  Trained ADV 41.184141397476196s.   ||  Synced ADV 2.4278199672698975s. 

Trained AVRG 69.93852162361145s.   ||  Synced AVRG 5.214478492736816s. 

Iteration:  100
Exporting agent
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  34.302008867263794s.   ||  Trained ADV 48.91893005371094s.   ||  Synced ADV 3.810840368270874s. 

Iteration:  101
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  34.53471803665161s.   ||  Trained ADV 27.854181051254272s.   ||  Synced ADV 2.8214428424835205s. 

Iteration:  102
Train

Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  34.36822152137756s.   ||  Trained ADV 40.9697060585022s.   ||  Synced ADV 2.4841079711914062s. 

Iteration:  125
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  32.94235134124756s.   ||  Trained ADV 55.42709732055664s.   ||  Synced ADV 3.0127689838409424s. 

Iteration:  126
Training Average Nets...
Evaluating vs. BR
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Generating Data...
Training Advantage Net...
Pushing new net to chief...
Synchronizing...
Generating Data:  30.251599073410034s.   ||  Trained ADV 42.7046844959259s.   ||  Synced ADV 2.88909649848938s. 

Trained AVRG 58.97218990325928s.   ||  Synced AVRG 5.086429834365845s. 

Iteration:  127
Ge