# **Setup, install dependencies**

In [None]:
# Clone repo and install requirements

!git clone https://github.com/suragnair/alpha-zero-general.git

Cloning into 'alpha-zero-general'...
remote: Enumerating objects: 1045, done.[K
remote: Total 1045 (delta 0), reused 0 (delta 0), pack-reused 1045[K
Receiving objects: 100% (1045/1045), 423.63 MiB | 24.50 MiB/s, done.
Resolving deltas: 100% (575/575), done.
Checking out files: 100% (109/109), done.


In [None]:
%cd '/content/alpha-zero-general'

/content/alpha-zero-general


In [None]:
!git checkout -t origin/master

Branch 'dotsandboxes' set up to track remote branch 'dotsandboxes' from 'origin'.
Switched to a new branch 'dotsandboxes'


In [None]:
!pip install -r docker/requirements.txt

Collecting cffi==1.11.5
[?25l  Downloading https://files.pythonhosted.org/packages/6d/c0/47db8f624f3e4e2f3f27be03a93379d1ba16a1450a7b1aacfa0366e2c0dd/cffi-1.11.5-cp36-cp36m-manylinux1_x86_64.whl (421kB)
[K     |████████████████████████████████| 430kB 8.7MB/s 
[?25hCollecting coloredlogs==14.0
[?25l  Downloading https://files.pythonhosted.org/packages/5c/2f/12747be360d6dea432e7b5dfae3419132cb008535cfe614af73b9ce2643b/coloredlogs-14.0-py2.py3-none-any.whl (43kB)
[K     |████████████████████████████████| 51kB 8.4MB/s 
[?25hCollecting cython==0.28.3
[?25l  Downloading https://files.pythonhosted.org/packages/6f/79/d8e2cd00bea8156a995fb284ce7b6677c49eccd2d318f73e201a9ce560dc/Cython-0.28.3-cp36-cp36m-manylinux1_x86_64.whl (3.4MB)
[K     |████████████████████████████████| 3.4MB 19.0MB/s 
[?25hCollecting flask==1.0
[?25l  Downloading https://files.pythonhosted.org/packages/55/b1/4365193655df97227ace49311365cc296e74b60c7f5c63d23cd30175e2f6/Flask-1.0-py2.py3-none-any.whl (97kB)
[K     

# **Train AlphaZero**

In [None]:
import logging
import coloredlogs
from Coach import Coach
from utils import dotdict
from dotsandboxes.keras.NNet import NNetWrapper
from dotsandboxes.DotsAndBoxesGame import DotsAndBoxesGame

In [None]:
log = logging.getLogger(__name__)
coloredlogs.install(level='INFO')  # Change this to DEBUG to see more info.

In [None]:
args = dotdict({
    'numIters': 1000,
    'numEps': 100,              # Number of complete self-play games to simulate during a new iteration.
    'tempThreshold': 15,        #
    'updateThreshold': 0.6,     # During arena playoff, new neural net will be accepted if threshold or more of games are won.
    'maxlenOfQueue': 200000,    # Number of game examples to train the neural networks.
    'numMCTSSims': 25,          # Number of games moves for MCTS to simulate.
    'arenaCompare': 40,         # Number of games to play during arena play to determine if new net will be accepted.
    'cpuct': 1,
    'checkpoint': './temp/',
    'load_model': False,
    'numItersForTrainExamplesHistory': 20,
})

In [None]:
# If you have a pre-trained model, you can load it here.
import os
if os.path.exists(os.path.join('pretrained_models', 'dotsandboxes', 'keras', '3x3', 'best.pth.tar.index')):
  print ("Using best pre-existing model")
  args['load_model'] = True
  args['load_folder_file'] = ('pretrained_models/dotsandboxes/keras/3x3','best.pth.tar')
else:
  print ("Not using best pre-existing model")

Using best pre-existing model


In [None]:
# Set very low iterations to let this notebook run in its entirety.

# In reality, training a model, even as simple as the one for Dots and Boxes, can take several hours or days.
args['numIters'] = 2
args['numEps'] = 2
args['arenaCompare'] = 2

In [None]:
game = DotsAndBoxesGame(n=3)

In [None]:
nnet = NNetWrapper(game)

In [None]:
if args.load_model:
    print('Loading checkpoint "{}/{}"...'.format(args.load_folder_file[0], args.load_folder_file[1]))
    nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])
else:
    print('Not loading a checkpoint.')

Loading checkpoint "pretrained_models/dotsandboxes/keras/3x3/best.pth.tar"...


In [None]:
coach = Coach(game, nnet, args)

In [None]:
%time coach.learn()

2020-11-08 02:05:04 dece90a6a033 Coach[66] INFO Starting Iter #1 ...
Self Play: 100%|██████████| 2/2 [00:25<00:00, 12.62s/it]


Checkpoint Directory exists! 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


2020-11-08 02:05:31 dece90a6a033 Coach[66] INFO PITTING AGAINST PREVIOUS VERSION
Arena.playGames (1): 100%|██████████| 1/1 [00:14<00:00, 14.93s/it]
Arena.playGames (2): 100%|██████████| 1/1 [00:12<00:00, 13.00s/it]
2020-11-08 02:05:59 dece90a6a033 Coach[66] INFO NEW/PREV WINS : 1 / 1 ; DRAWS : 0
2020-11-08 02:05:59 dece90a6a033 Coach[66] INFO REJECTING NEW MODEL
2020-11-08 02:05:59 dece90a6a033 Coach[66] INFO Starting Iter #2 ...
Self Play: 100%|██████████| 2/2 [00:21<00:00, 10.60s/it]


Checkpoint Directory exists! 
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


2020-11-08 02:06:21 dece90a6a033 Coach[66] INFO PITTING AGAINST PREVIOUS VERSION
Arena.playGames (1): 100%|██████████| 1/1 [00:13<00:00, 13.94s/it]
Arena.playGames (2): 100%|██████████| 1/1 [00:11<00:00, 11.99s/it]
2020-11-08 02:06:47 dece90a6a033 Coach[66] INFO NEW/PREV WINS : 1 / 1 ; DRAWS : 0
2020-11-08 02:06:47 dece90a6a033 Coach[66] INFO REJECTING NEW MODEL


CPU times: user 1min 39s, sys: 6.36 s, total: 1min 46s
Wall time: 1min 43s


In [None]:
# Checkpoints and best model (if found) will be saved in this folder
%ls /content/alpha-zero-general/temp

checkpoint                     temp.pth.tar.data-00000-of-00001
checkpoint_0.pth.tar.examples  temp.pth.tar.index
checkpoint_1.pth.tar.examples
