In [1]:
"""Play a series of game between resnet and QuantumNet AlphaGo models"""
import os

# This forces OpenMP to use 1 single thread, which is needed to
# prevent contention between multiple process.
os.environ['OMP_NUM_THREADS'] = '1'
# Tell numpy to only use one core.
os.environ['MKL_NUM_THREADS'] = '1'


import multiprocessing as mp
import sys
from absl import flags

import numpy as np
import torch
from torch.optim.lr_scheduler import MultiStepLR

FLAGS = flags.FLAGS

flags.DEFINE_integer('board_size', 9, 'Board size for Go.')
flags.DEFINE_float('komi', 7.5, 'Komi rule for Go.')
flags.DEFINE_integer(
    'num_stack',
    8,
    'Stack N previous states, the state is an image of N x 2 + 1 binary planes.',
)

flags.DEFINE_integer('num_res_blocks', 10, 'Number of residual blocks in the neural network.')
flags.DEFINE_integer('num_filters_resnet', 128, 'Number of filters for the conv2d layers in the neural network.')
flags.DEFINE_integer('num_filters_quantum', 84, 'Number of filters for the conv2d layers in the neural network.')
flags.DEFINE_integer('max_depth', 5 , ' maximum depth for quantum search')
flags.DEFINE_integer('branching_width', 3, ' branching_width for quantum search')
flags.DEFINE_integer('beam_width', 1, ' beam_width for quantum search')
flags.DEFINE_integer(
    'num_fc_units',
    128,
    'Number of hidden units in the linear layer of the neural network.',
)
flags.DEFINE_integer('num_search', 2, ' number of search modules for quantum search')


flags.DEFINE_integer('min_games', 20000, 'Collect number of self-play games before learning starts.')
flags.DEFINE_integer(
    'games_per_ckpt',
    100,
    'Collect minimum number of self-play games using the last checkpoint before creating the next checkpoint.',
)

flags.DEFINE_integer(
    'replay_capacity',
    250000 * 50,
    'Replay buffer capacity is number of game * average game length.' 'Note, 250000 games may need ~30GB of RAM',
)
flags.DEFINE_integer(
    'batch_size',
    1024,
    'To avoid overfitting, we want to make sure the agent only sees ~10% of samples in the replay over one checkpoint.'
    'That is, batch_size * ckpt_interval <= replay_capacity * 0.1',
)

flags.DEFINE_bool(
    'argument_data',
    True,
    'Apply random rotation and mirroring to the training data, default on.',
)
flags.DEFINE_bool('compress_data', False, 'Compress state when saving in replay buffer, default off.')

flags.DEFINE_float('init_lr', 0.01, 'Initial learning rate.')
flags.DEFINE_float('lr_decay', 0.1, 'Learning rate decay rate.')
flags.DEFINE_multi_integer(
    'lr_milestones',
    [10000, 20000, 40000],
    'The number of training steps at which the learning rate will be decayed.',
)
flags.DEFINE_float('l2_regularization', 1e-4, 'The L2 regularization parameter applied to weights.')
flags.DEFINE_float('sgd_momentum', 0.9, '')

flags.DEFINE_integer(
    'max_training_steps',
    int(5e4),
    'Number of training steps (measured in network parameter update, one batch is one training step).',
)
flags.DEFINE_integer('num_actors',32, 'Number of self-play actor processes.')
flags.DEFINE_integer(
    'num_simulations',
    200,
    'Number of simulations per MCTS search, this applies to both self-play and evaluation processes.',
)
flags.DEFINE_integer(
    'num_parallel',
    1,
    'Number of leaves to collect before using the neural network to evaluate the positions during MCTS search,'
    '1 means no parallel search.',
)
flags.DEFINE_float(
    'c_puct_base',
    19652,
    'Exploration constants balancing priors vs. search values. Original paper use 19652',
)
flags.DEFINE_float(
    'c_puct_init',
    1.25,
    'Exploration constants balancing priors vs. search values. Original paper use 1.25',
)

flags.DEFINE_integer(
    'warm_up_steps',
    16,
    'Number of steps at the beginning of a self-play game where the search temperature is set to 1.',
)
flags.DEFINE_float(
    'init_resign_threshold',
    -0.88,
    'The self-play game is resigned if MCTS search values are lesser than this threshold.'
    'This value is also dynamically adjusted (decreased) during training to keep the false positive below the target level.'
    '-1 means no resign and it disables all the features related to resignations during self-play.',
)
flags.DEFINE_integer(
    'check_resign_after_steps',
    40,
    'Number steps into the self-play game before checking for resign.',
)
flags.DEFINE_float(
    'target_fp_rate',
    0.05,
    'Target resignation false positives rate, the resignation threshold is dynamically adjusted to keep the false positives rate below this value.',
)
flags.DEFINE_float(
    'disable_resign_ratio',
    0.1,
    'Disable resign for proportion of self-play games so we can measure resignation false positives.',
)
flags.DEFINE_integer(
    'reset_fp_interval',
    100000,
    'The frequency (measured in number of self-play games) to reset resignation threshold,'
    'so statistics from old games do not influence current play.',
)
flags.DEFINE_integer(
    'no_resign_games',
    50000,
    'Initial games played with resignation disable. '
    'This makes sense as when starting out, the prediction from the neural network is not accurate.',
)

flags.DEFINE_float(
    'default_rating',
    1500,
    'Default elo rating, change to the rating (for black) from last checkpoint when resume training.',
)
flags.DEFINE_integer('ckpt_interval', 500, 'The frequency (in training step) to create new checkpoint.')
flags.DEFINE_integer('log_interval', 20, 'The frequency (in training step) to log training statistics.')
flags.DEFINE_string('ckpt_dir', './checkpoints/go/9x9/quantum', 'Path for checkpoint file.')
flags.DEFINE_string(
    'logs_dir',
    './logs/go/9x9/quantum',
    'Path to save statistics for self-play, training, and evaluation.',
)
flags.DEFINE_string(
    'dataset_dir',
    'go_dataset.pth',
    'Go dataset',
)
flags.DEFINE_string(
    'eval_games_dir',
    './games/pro_games/go/9x9',
    'Path contains evaluation games in sgf format.',
)
flags.DEFINE_string(
    'save_sgf_dir',
    './games/selfplay_games/go/9x9',
    'Path to selfplay and evaluation games in sgf format.',
)
flags.DEFINE_integer('save_sgf_interval', 500, 'How often to save self-play games.')

flags.DEFINE_integer(
    'save_replay_interval',
    0,
    'The frequency (in number of self-play games) to save the replay buffer state.'
    'So we can resume training without staring from zero. 0 means do not save replay state.'
    'If you set this to a non-zero value, you should make sure the path specified by "FLAGS.ckpt_dir" have at least 100GB of free space.',
)
flags.DEFINE_string('load_ckpt', '', 'Resume training by starting from last checkpoint.')
flags.DEFINE_string('load_replay', '', 'Resume training by loading saved replay buffer state.')

flags.DEFINE_string('log_level', 'INFO', '')
flags.DEFINE_integer('seed', 1, 'Seed the runtime.')

flags.register_validator('num_simulations', lambda x: x > 1)
flags.register_validator('log_level', lambda x: x in ['INFO', 'DEBUG'])
flags.register_multi_flags_validator(
    ['num_parallel', 'c_puct_base'],
    lambda flags: flags['c_puct_base'] >= 19652 * (flags['num_parallel'] / 800),
    '',
)


# Initialize flags
FLAGS(sys.argv, known_only = True)

os.environ['BOARD_SIZE'] = str(FLAGS.board_size)

In [2]:
from alpha_zero.envs.go import GoEnv
from alpha_zero.core.pipeline import (
    set_seed,
    maybe_create_dir,
)
from alpha_zero.core.single_game import run_game_series
from alpha_zero.core.quantum_net import QuantumAlphaZeroNet
from alpha_zero.core.network import AlphaZeroNet
from alpha_zero.utils.util import extract_args_from_flags_dict, create_logger

Plink failed to import tkinter.


In [3]:
def env_builder():
        return GoEnv(komi=FLAGS.komi, num_stack=FLAGS.num_stack)
eval_env = env_builder()

input_shape = eval_env.observation_space.shape
num_actions = eval_env.action_space.n
def network_builder(quantum = True):
        if quantum:
            return QuantumAlphaZeroNet(
                input_shape,
                num_actions,
                FLAGS.num_filters_quantum,
                FLAGS.max_depth,
                FLAGS.branching_width,
                FLAGS.beam_width,
                FLAGS.num_fc_units,
                FLAGS.num_search

            )
        else:
            return AlphaZeroNet(
            input_shape,
            num_actions,
            FLAGS.num_res_blocks,
            FLAGS.num_filters_resnet,
            FLAGS.num_fc_units,
        )

resnet_model = network_builder(quantum = False)

quantum_model = network_builder()




In [4]:

set_seed(FLAGS.seed)


logger = create_logger(FLAGS.log_level)

logger.info(extract_args_from_flags_dict(FLAGS.flag_values_dict()))

INFO 2024-11-28 22:13:26 98395850.py:6] {'board_size': 9, 'komi': 7.5, 'num_stack': 8, 'num_res_blocks': 10, 'num_filters_resnet': 128, 'num_filters_quantum': 84, 'max_depth': 5, 'branching_width': 3, 'beam_width': 1, 'num_fc_units': 128, 'num_search': 2, 'min_games': 20000, 'games_per_ckpt': 100, 'replay_capacity': 12500000, 'batch_size': 1024, 'argument_data': True, 'compress_data': False, 'init_lr': 0.01, 'lr_decay': 0.1, 'lr_milestones': [10000, 20000, 40000], 'l2_regularization': 0.0001, 'sgd_momentum': 0.9, 'max_training_steps': 50000, 'num_actors': 32, 'num_simulations': 200, 'num_parallel': 1, 'c_puct_base': 19652.0, 'c_puct_init': 1.25, 'warm_up_steps': 16, 'init_resign_threshold': -0.88, 'check_resign_after_steps': 40, 'target_fp_rate': 0.05, 'disable_resign_ratio': 0.1, 'reset_fp_interval': 100000, 'no_resign_games': 50000, 'default_rating': 1500.0, 'ckpt_interval': 500, 'log_interval': 20, 'ckpt_dir': './checkpoints/go/9x9/quantum', 'logs_dir': './logs/go/9x9/quantum', 'dat

In [7]:
if torch.cuda.is_available():
    learner_device = torch.device('cuda')

run_game_series(
    seed = FLAGS.seed,
    network_1 = resnet_model,
    network_2 = quantum_model,
    name_1 = 'resnet',
    name_2 = 'quantum',
    device = learner_device,
    env = eval_env,

    num_simulations = FLAGS.num_simulations,
    num_parallel = FLAGS.num_parallel,

    c_puct_base = FLAGS.c_puct_base,
    c_puct_init = FLAGS.c_puct_init,
    default_rating = FLAGS.default_rating,
    logs_dir = FLAGS.logs_dir,
    load_ckpt_1 =  './checkpoints/go/9x9/resnets/network/training_steps_5500.ckpt',
    load_ckpt_2 = './checkpoints/go/9x9/quantum/d_5s_2br_3f_84be_1/training_steps_6500.ckpt',

    log_level = FLAGS.log_level,
)

  loaded_state_1 = torch.load(load_ckpt_1, map_location=device)
INFO 2024-11-28 22:15:43 single_game.py:219] resnet model loaded from checkpoint: ./checkpoints/go/9x9/resnets/network/training_steps_5500.ckpt
  loaded_state_2 = torch.load(load_ckpt_2, map_location=device)
INFO 2024-11-28 22:15:43 single_game.py:225] quantum model loaded from checkpoint: ./checkpoints/go/9x9/quantum/d_5s_2br_3f_84be_1/training_steps_6500.ckpt
INFO 2024-11-28 22:15:43 single_game.py:254] Playing one game: resnet (Black) vs quantum (white)


Go (9x9)
Black: X, White: O

Game over: No, Result: W+7.5
Steps: 0, Current player: X

X captures: 0, O captures: 0 

     A  B  C  D  E  F  G  H  J  
   +---------------------------+
9  | .  .  .  .  .  .  .  .  . | 9 
8  | .  .  .  .  .  .  .  .  . | 8 
7  | .  .  .  .  .  .  .  .  . | 7 
6  | .  .  .  .  .  .  .  .  . | 6 
5  | .  .  .  .  .  .  .  .  . | 5 
4  | .  .  .  .  .  .  .  .  . | 4 
3  | .  .  .  .  .  .  .  .  . | 3 
2  | .  .  .  .  .  .  .  .  . | 2 
1  | .  .  .  .  .  .  .  .  . | 1 
   +---------------------------+
     A  B  C  D  E  F  G  H  J  

Go (9x9)
Black: X, White: O

Game over: No, Result: B+73.5
Steps: 1, Current player: O

X captures: 0, O captures: 0 

     A  B  C  D  E  F  G  H  J  
   +---------------------------+
9  | .  .  .  .  .  .  .  .  . | 9 
8  | .  .  .  .  .  .  .  .  . | 8 
7  | .  .  .  .  .  .  .  .  . | 7 
6  | .  . (X) .  .  .  .  .  . | 6 
5  | .  .  .  .  .  .  .  .  . | 5 
4  | .  .  .  .  .  .  .  .  . | 4 
3  | .  .  .  .  .  .  .

INFO 2024-11-28 22:19:19 single_game.py:269] Game result: quantum wins as White
INFO 2024-11-28 22:19:19 single_game.py:270] Elo rating : resnet: 1484.736306793522, quantum: 1516.0
INFO 2024-11-28 22:19:19 single_game.py:271] Game length: 98 moves


Go (9x9)
Black: X, White: O

Game over: No, Result: W+46.5
Steps: 97, Current player: O

X captures: 3, O captures: 27 

     A  B  C  D  E  F  G  H  J  
   +---------------------------+
9  | O  O  O  O  O  .  O  O  O | 9 
8  | .  O  O  O  O  O  O  O  . | 8 
7  | .  O  O  O  O  .  O  O  O | 7 
6  | O  O  .  O  O  O  .  O  . | 6 
5  | .  O  O  X  O  .  O  .  O | 5 
4  | O  O  O  X  X  O  .  O  . | 4 
3  | X  X  O  X  X  X  O  .  O | 3 
2  | X  X  O  X  X  .  X  O  . | 2 
1  | X  X  X  X  X  X  X  O  O | 1 
   +---------------------------+
     A  B  C  D  E  F  G  H  J  

Go (9x9)
Black: X, White: O

Game over: Yes, Result: W+46.5
Steps: 98, Current player: X

X captures: 3, O captures: 27 

     A  B  C  D  E  F  G  H  J  
   +---------------------------+
9  | O  O  O  O  O  .  O  O  O | 9 
8  | .  O  O  O  O  O  O  O  . | 8 
7  | .  O  O  O  O  .  O  O  O | 7 
6  | O  O  .  O  O  O  .  O  . | 6 
5  | .  O  O  X  O  .  O  .  O | 5 
4  | O  O  O  X  X  O  .  O  . | 4 
3  | X  X  O  X  X