In [6]:
import torch
import logging
import matplotlib.pyplot as plt

torch.backends.cudnn.benchmark = True

from core.evaluation.mcts_hypers import MCTSHypers
from core.resnet import TurboZeroResnet, TurboZeroArchParams
from core.training.training_hypers import TurboZeroHypers
from envs.othello.evaluator import OthelloMCTS
from envs.othello.trainer import OthelloTrainer, load_checkpoint
from core.utils.custom_activations import Tanh0to1

logging.basicConfig(filename='training_othello.log', filemode='a', level=logging.INFO, format='%(asctime)s %(message)s')
logging.info('Starting training')

In [7]:
NUM_PARALLEL_ENVS = 8
CHECKPOINT_FILE = ''
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
DEBUG = False # disables JIT compilation

In [None]:
if CHECKPOINT_FILE:
    trainer = load_checkpoint(NUM_PARALLEL_ENVS, CHECKPOINT_FILE, DEVICE, debug=DEBUG)
else:
    arch_params=TurboZeroArchParams(
        input_size=torch.Size((2, 8, 8)),
        policy_size=65,
        res_channels=128,
        res_blocks=8,
        value_head_res_channels=128,
        value_head_res_blocks=4,
        policy_head_res_channels=128,
        policy_head_res_blocks=4,
        kernel_size=3,
        value_output_activation=Tanh0to1()
    )

    hypers=TurboZeroHypers(
        learning_rate = 5e-5,
        replay_memory_size = 30000,
        replay_memory_min_size = 30000,
        minibatch_size = 4096,
        minibatches_per_update = 1,
        train_episodes_per_epoch = 10000,
        test_episodes_per_epoch = 1024,
        temperature_train = 1.0,
        temperature_test = 0.1
    )

    eval_hypers_train = MCTSHypers(num_iters = 750, max_depth = 750, puct_coeff=1.41, dirichlet_alpha=0.5, dirichlet_epsilon=0.25)
    eval_hypers_test = MCTSHypers(num_iters = 1500, max_depth = 1500, puct_coeff=1.41, dirichlet_alpha=0.1, dirichlet_epsilon=0.1)

    run_tag='test_othello_2'

    model = TurboZeroResnet(arch_params).to(DEVICE)

    optimizer = torch.optim.AdamW(model.parameters(), lr=hypers.learning_rate)

    baselines = ['random']

    trainer = OthelloTrainer(
        OthelloMCTS(NUM_PARALLEL_ENVS, DEVICE, 8, eval_hypers_train, debug=DEBUG),
        OthelloMCTS(hypers.test_episodes_per_epoch, DEVICE, 8, eval_hypers_test, debug=DEBUG),
        NUM_PARALLEL_ENVS,
        DEVICE,
        torch.device('cpu'),
        model,
        optimizer,
        hypers,
        baselines,
        run_tag=run_tag
    )

plt.close('all')

In [9]:
trainer.training_loop()