# **Train AlphaZero**

In [6]:
import logging
from Coach import Coach
from utils import dotdict
from dotsandboxes.keras.NNet import NNetWrapper
from dotsandboxes.DotsAndBoxesGame import DotsAndBoxesGame

In [None]:
log = logging.getLogger(__name__)
coloredlogs.install(level='INFO')  # Change this to DEBUG to see more info.

In [7]:
args = dotdict({
    'numIters': 1000,
    'numEps': 100,              # Number of complete self-play games to simulate during a new iteration.
    'tempThreshold': 15,        #
    'updateThreshold': 0.6,     # During arena playoff, new neural net will be accepted if threshold or more of games are won.
    'maxlenOfQueue': 200000,    # Number of game examples to train the neural networks.
    'numMCTSSims': 25,          # Number of games moves for MCTS to simulate.
    'arenaCompare': 40,         # Number of games to play during arena play to determine if new net will be accepted.
    'cpuct': 1,
    'checkpoint': './temp/',
    'load_model': False,
    'numItersForTrainExamplesHistory': 20,
})

In [9]:
# If you have a pre-trained model, you can load it here.
import os
if os.path.exists(os.path.join('pretrained_models', 'dotsandboxes', 'keras', '3x3', 'best.pth.tar.index')):
    print ("Using best pre-existing model")
    args['load_model'] = True
    args['load_folder_file'] = ('pretrained_models/dotsandboxes/keras/3x3','best.pth.tar')
else:
    print ("Not using best pre-existing model")

Not using best pre-existing model


In [10]:
# Set very low iterations to let this notebook run in its entirety.

# In reality, training a model, even as simple as the one for Dots and Boxes, can take several hours or days.
args['numIters'] = 2
args['numEps'] = 2
args['arenaCompare'] = 2

In [11]:
game = DotsAndBoxesGame(n=3)

In [12]:
nnet = NNetWrapper(game) ; 

2022-12-25 10:32:28.780151: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-12-25 10:32:28.792314: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-25 10:32:28.874938: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [13]:
if args.load_model:
    print('Loading checkpoint "{}/{}"...'.format(args.load_folder_file[0], args.load_folder_file[1]))
    nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])
else:
    print('Not loading a checkpoint.')

Not loading a checkpoint.


In [14]:
coach = Coach(game, nnet, args)

In [15]:
%time coach.learn()

Self Play:   0%|          | 0/2 [00:00<?, ?it/s]2022-12-25 10:33:10.714196: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-12-25 10:33:10.838231: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2494355000 Hz
Self Play: 100%|██████████| 2/2 [03:45<00:00, 113.00s/it]


Checkpoint Directory exists! 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Arena.playGames (1): 100%|██████████| 1/1 [02:13<00:00, 133.40s/it]
Arena.playGames (2): 100%|██████████| 1/1 [01:48<00:00, 108.93s/it]
Self Play:   0%|          | 0/2 [00:00<?, ?it/s]

Checkpoint Directory exists! 
Checkpoint Directory exists! 


Self Play: 100%|██████████| 2/2 [03:37<00:00, 108.58s/it]


Checkpoint Directory exists! 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Arena.playGames (1): 100%|██████████| 1/1 [01:56<00:00, 116.10s/it]
Arena.playGames (2): 100%|██████████| 1/1 [01:50<00:00, 110.96s/it]

CPU times: user 25min 51s, sys: 24min 15s, total: 50min 6s
Wall time: 15min 52s





In [None]:
# Checkpoints and best model (if found) will be saved in this folder
%ls /content/alpha-zero-general/temp

checkpoint                     temp.pth.tar.data-00000-of-00001
checkpoint_0.pth.tar.examples  temp.pth.tar.index
checkpoint_1.pth.tar.examples
