In [1]:
import random
import copy
import numpy as np
import pyspiel
from open_spiel.python.algorithms import mcts, random_agent, minimax
from othello_game import OthelloGame, OthelloState
from MCTS import MCTS
from Arena import Arena
from Coach import Coach
from BasicModel import BasicModel
from NNet import NNetWrapper
from QNet import QNetWrapper
from HybridQNet import HybridQNetWrapper
import matplotlib.pyplot as plt

In [2]:
def plot_performance_history(history, name):
    x = [i for i, _ in history]
    random_wins = [results["random"][0] for _, results in history]
    random_draws = [results["random"][2] for _, results in history]
    random_losses = [results["random"][1] for _, results in history]
    mcts_wins = [results["mcts"][0] for _, results in history]
    mcts_draws = [results["mcts"][2] for _, results in history]
    mcts_losses = [results["mcts"][1] for _, results in history]

    fig, axs = plt.subplots(1, 2, figsize=(8, 3))

    axs[0].bar(x, random_wins, color='g', width=2, label='random wins')
    axs[0].bar(x, random_draws, color='b', width=2, bottom=random_wins, label='random draws')
    axs[0].bar(x, random_losses, color='r', width=2, bottom=np.array(random_wins)+np.array(random_draws), label='random losses')

    axs[0].set_xticks(x)

    axs[0].set_xlabel('Iteration')
    axs[0].set_ylabel('Number of games')
    axs[0].set_title(f"{name} vs Random Agent")

    axs[1].bar(x, mcts_wins, color='g', width=2, label='mcts wins')
    axs[1].bar(x, mcts_draws, color='b', width=2, bottom=mcts_wins, label='mcts draws')
    axs[1].bar(x, mcts_losses, color='r', width=2, bottom=np.array(mcts_wins)+np.array(mcts_draws), label='mcts losses')

    axs[1].set_xticks(x)

    axs[1].set_xlabel('Iteration')
    axs[1].set_ylabel('Number of games')
    axs[1].set_title(f"{name} vs MCTS Agent")

In [3]:
game = pyspiel.load_game("othello_nxn", {"n": 6})
hybrid_qnet = HybridQNetWrapper(game, lr=0.0025, use_gpu=False)
print(f"hybrid_qnet params: {hybrid_qnet.total_params}")

Using CPU with lightning.qubit
hybrid_qnet params: 799


In [4]:
hybrid_qnet_coach = Coach(game, hybrid_qnet, numMCTSSims=25, max_history_len=8192, compare_games=50)
hybrid_qnet_coach.learn()

Iter 0
Testing...


Arena.playGames (1): 100%|██████████| 25/25 [01:46<00:00,  4.24s/it]
Arena.playGames (2): 100%|██████████| 25/25 [01:40<00:00,  4.01s/it]
Arena.playGames (1): 100%|██████████| 25/25 [02:15<00:00,  5.43s/it]
Arena.playGames (2): 100%|██████████| 25/25 [02:21<00:00,  5.67s/it]


{'random': (27, 17, 6), 'mcts': (4, 45, 1)}
Iter 1


Self Play: 100%|██████████| 5/5 [00:42<00:00,  8.56s/it]


Epoch 0


Training Net: 100%|██████████| 3/3 [00:00<00:00, 25.33it/s, Loss_v=6.67e-01]


Epoch 1


Training Net: 100%|██████████| 3/3 [00:00<00:00, 27.02it/s, Loss_v=6.78e-01]


Epoch 2


Training Net: 100%|██████████| 3/3 [00:00<00:00, 27.11it/s, Loss_v=6.62e-01]


Epoch 3


Training Net: 100%|██████████| 3/3 [00:00<00:00, 28.31it/s, Loss_v=6.66e-01]


Epoch 4


Training Net: 100%|██████████| 3/3 [00:00<00:00, 27.66it/s, Loss_v=6.60e-01]


Epoch 5


Training Net: 100%|██████████| 3/3 [00:00<00:00, 28.48it/s, Loss_v=6.20e-01]


Epoch 6


Training Net: 100%|██████████| 3/3 [00:00<00:00, 27.38it/s, Loss_v=6.44e-01]


Epoch 7


Training Net: 100%|██████████| 3/3 [00:00<00:00, 27.95it/s, Loss_v=6.10e-01]


Epoch 8


Training Net: 100%|██████████| 3/3 [00:00<00:00, 22.72it/s, Loss_v=6.23e-01]


Epoch 9


Training Net: 100%|██████████| 3/3 [00:00<00:00, 27.52it/s, Loss_v=5.92e-01]


Epoch 10


Training Net: 100%|██████████| 3/3 [00:00<00:00, 28.73it/s, Loss_v=5.74e-01]


Epoch 11


Training Net: 100%|██████████| 3/3 [00:00<00:00, 26.70it/s, Loss_v=5.77e-01]


Epoch 12


Training Net: 100%|██████████| 3/3 [00:00<00:00, 28.45it/s, Loss_v=5.42e-01]


Epoch 13


Training Net: 100%|██████████| 3/3 [00:00<00:00, 25.70it/s, Loss_v=5.62e-01]


Epoch 14


Training Net: 100%|██████████| 3/3 [00:00<00:00, 25.41it/s, Loss_v=5.79e-01]


Epoch 15


Training Net: 100%|██████████| 3/3 [00:00<00:00, 28.37it/s, Loss_v=5.14e-01]


Epoch 16


Training Net: 100%|██████████| 3/3 [00:00<00:00, 25.83it/s, Loss_v=5.46e-01]


Epoch 17


Training Net: 100%|██████████| 3/3 [00:00<00:00, 27.33it/s, Loss_v=5.19e-01]


Epoch 18


Training Net: 100%|██████████| 3/3 [00:00<00:00, 28.05it/s, Loss_v=5.28e-01]


Epoch 19


Training Net: 100%|██████████| 3/3 [00:00<00:00, 28.89it/s, Loss_v=4.44e-01]


Iter 2


Self Play: 100%|██████████| 5/5 [00:40<00:00,  8.18s/it]


Epoch 0


Training Net: 100%|██████████| 7/7 [00:00<00:00, 27.01it/s, Loss_v=7.50e-01]


Epoch 1


Training Net: 100%|██████████| 7/7 [00:00<00:00, 27.16it/s, Loss_v=7.07e-01]


Epoch 2


Training Net: 100%|██████████| 7/7 [00:00<00:00, 28.19it/s, Loss_v=6.42e-01]


Epoch 3


Training Net: 100%|██████████| 7/7 [00:00<00:00, 26.90it/s, Loss_v=6.09e-01]


Epoch 4


Training Net: 100%|██████████| 7/7 [00:00<00:00, 27.39it/s, Loss_v=5.94e-01]


Epoch 5


Training Net: 100%|██████████| 7/7 [00:00<00:00, 25.72it/s, Loss_v=5.13e-01]


Epoch 6


Training Net: 100%|██████████| 7/7 [00:00<00:00, 26.51it/s, Loss_v=4.89e-01]


Epoch 7


Training Net: 100%|██████████| 7/7 [00:00<00:00, 28.13it/s, Loss_v=4.55e-01]


Epoch 8


Training Net: 100%|██████████| 7/7 [00:00<00:00, 26.96it/s, Loss_v=4.39e-01]


Epoch 9


Training Net: 100%|██████████| 7/7 [00:00<00:00, 28.22it/s, Loss_v=4.06e-01]


Epoch 10


Training Net: 100%|██████████| 7/7 [00:00<00:00, 28.59it/s, Loss_v=3.93e-01]


Epoch 11


Training Net: 100%|██████████| 7/7 [00:00<00:00, 27.99it/s, Loss_v=3.68e-01]


Epoch 12


Training Net: 100%|██████████| 7/7 [00:00<00:00, 28.18it/s, Loss_v=3.42e-01]


Epoch 13


Training Net: 100%|██████████| 7/7 [00:00<00:00, 27.53it/s, Loss_v=3.22e-01]


Epoch 14


Training Net: 100%|██████████| 7/7 [00:00<00:00, 27.92it/s, Loss_v=3.21e-01]


Epoch 15


Training Net: 100%|██████████| 7/7 [00:00<00:00, 27.16it/s, Loss_v=3.10e-01]


Epoch 16


Training Net: 100%|██████████| 7/7 [00:00<00:00, 28.97it/s, Loss_v=3.22e-01]


Epoch 17


Training Net: 100%|██████████| 7/7 [00:00<00:00, 26.47it/s, Loss_v=3.01e-01]


Epoch 18


Training Net: 100%|██████████| 7/7 [00:00<00:00, 26.77it/s, Loss_v=2.97e-01]


Epoch 19


Training Net: 100%|██████████| 7/7 [00:00<00:00, 27.30it/s, Loss_v=2.72e-01]


Iter 3


Self Play: 100%|██████████| 5/5 [00:44<00:00,  8.93s/it]


Epoch 0


Training Net: 100%|██████████| 10/10 [00:00<00:00, 26.98it/s, Loss_v=5.06e-01]


Epoch 1


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.98it/s, Loss_v=4.52e-01]


Epoch 2


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.45it/s, Loss_v=4.20e-01]


Epoch 3


Training Net: 100%|██████████| 10/10 [00:00<00:00, 28.00it/s, Loss_v=3.89e-01]


Epoch 4


Training Net: 100%|██████████| 10/10 [00:00<00:00, 26.83it/s, Loss_v=3.90e-01]


Epoch 5


Training Net: 100%|██████████| 10/10 [00:00<00:00, 26.94it/s, Loss_v=3.76e-01]


Epoch 6


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.32it/s, Loss_v=3.60e-01]


Epoch 7


Training Net: 100%|██████████| 10/10 [00:00<00:00, 28.65it/s, Loss_v=3.71e-01]


Epoch 8


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.10it/s, Loss_v=3.51e-01]


Epoch 9


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.74it/s, Loss_v=3.40e-01]


Epoch 10


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.79it/s, Loss_v=3.22e-01]


Epoch 11


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.71it/s, Loss_v=3.17e-01]


Epoch 12


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.47it/s, Loss_v=3.31e-01]


Epoch 13


Training Net: 100%|██████████| 10/10 [00:00<00:00, 22.84it/s, Loss_v=3.30e-01]


Epoch 14


Training Net: 100%|██████████| 10/10 [00:00<00:00, 28.43it/s, Loss_v=3.32e-01]


Epoch 15


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.81it/s, Loss_v=3.33e-01]


Epoch 16


Training Net: 100%|██████████| 10/10 [00:00<00:00, 28.17it/s, Loss_v=3.09e-01]


Epoch 17


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.39it/s, Loss_v=3.09e-01]


Epoch 18


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.01it/s, Loss_v=3.07e-01]


Epoch 19


Training Net: 100%|██████████| 10/10 [00:00<00:00, 27.44it/s, Loss_v=3.09e-01]


Iter 4


Self Play: 100%|██████████| 5/5 [00:41<00:00,  8.28s/it]


Epoch 0


Training Net: 100%|██████████| 14/14 [00:00<00:00, 25.75it/s, Loss_v=4.82e-01]


Epoch 1


Training Net: 100%|██████████| 14/14 [00:00<00:00, 28.61it/s, Loss_v=4.51e-01]


Epoch 2


Training Net: 100%|██████████| 14/14 [00:00<00:00, 27.52it/s, Loss_v=4.33e-01]


Epoch 3


Training Net: 100%|██████████| 14/14 [00:00<00:00, 27.65it/s, Loss_v=3.94e-01]


Epoch 4


Training Net: 100%|██████████| 14/14 [00:00<00:00, 27.23it/s, Loss_v=3.96e-01]


Epoch 5


Training Net: 100%|██████████| 14/14 [00:00<00:00, 25.83it/s, Loss_v=3.80e-01]


Epoch 6


Training Net: 100%|██████████| 14/14 [00:00<00:00, 28.05it/s, Loss_v=3.91e-01]


Epoch 7


Training Net: 100%|██████████| 14/14 [00:00<00:00, 28.05it/s, Loss_v=3.65e-01]


Epoch 8


Training Net: 100%|██████████| 14/14 [00:00<00:00, 27.12it/s, Loss_v=3.91e-01]


Epoch 9


Training Net: 100%|██████████| 14/14 [00:00<00:00, 28.72it/s, Loss_v=3.72e-01]


Epoch 10


Training Net: 100%|██████████| 14/14 [00:00<00:00, 28.42it/s, Loss_v=3.55e-01]


Epoch 11


Training Net: 100%|██████████| 14/14 [00:00<00:00, 23.99it/s, Loss_v=3.57e-01]


Epoch 12


Training Net: 100%|██████████| 14/14 [00:00<00:00, 23.16it/s, Loss_v=3.60e-01]


Epoch 13


Training Net: 100%|██████████| 14/14 [00:00<00:00, 28.76it/s, Loss_v=3.69e-01]


Epoch 14


Training Net: 100%|██████████| 14/14 [00:00<00:00, 27.77it/s, Loss_v=3.49e-01]


Epoch 15


Training Net: 100%|██████████| 14/14 [00:00<00:00, 26.69it/s, Loss_v=3.30e-01]


Epoch 16


Training Net: 100%|██████████| 14/14 [00:00<00:00, 27.81it/s, Loss_v=3.44e-01]


Epoch 17


Training Net: 100%|██████████| 14/14 [00:00<00:00, 28.31it/s, Loss_v=3.41e-01]


Epoch 18


Training Net: 100%|██████████| 14/14 [00:00<00:00, 27.00it/s, Loss_v=3.45e-01]


Epoch 19


Training Net: 100%|██████████| 14/14 [00:00<00:00, 27.71it/s, Loss_v=3.38e-01]


Testing...


Arena.playGames (1): 100%|██████████| 25/25 [01:40<00:00,  4.01s/it]
Arena.playGames (2): 100%|██████████| 25/25 [01:31<00:00,  3.68s/it]
Arena.playGames (1): 100%|██████████| 25/25 [02:14<00:00,  5.39s/it]
Arena.playGames (2): 100%|██████████| 25/25 [02:22<00:00,  5.69s/it]


{'random': (38, 11, 1), 'mcts': (23, 26, 1)}
Iter 5


Self Play: 100%|██████████| 5/5 [00:42<00:00,  8.60s/it]


Epoch 0


Training Net: 100%|██████████| 17/17 [00:00<00:00, 26.45it/s, Loss_v=4.13e-01]


Epoch 1


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.77it/s, Loss_v=3.78e-01]


Epoch 2


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.82it/s, Loss_v=3.75e-01]


Epoch 3


Training Net: 100%|██████████| 17/17 [00:00<00:00, 26.28it/s, Loss_v=3.45e-01]


Epoch 4


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.21it/s, Loss_v=3.52e-01]


Epoch 5


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.79it/s, Loss_v=3.42e-01]


Epoch 6


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.27it/s, Loss_v=3.39e-01]


Epoch 7


Training Net: 100%|██████████| 17/17 [00:00<00:00, 28.08it/s, Loss_v=3.31e-01]


Epoch 8


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.60it/s, Loss_v=3.31e-01]


Epoch 9


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.36it/s, Loss_v=3.44e-01]


Epoch 10


Training Net: 100%|██████████| 17/17 [00:00<00:00, 24.62it/s, Loss_v=3.30e-01]


Epoch 11


Training Net: 100%|██████████| 17/17 [00:00<00:00, 26.82it/s, Loss_v=3.04e-01]


Epoch 12


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.00it/s, Loss_v=3.15e-01]


Epoch 13


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.86it/s, Loss_v=3.20e-01]


Epoch 14


Training Net: 100%|██████████| 17/17 [00:00<00:00, 28.29it/s, Loss_v=3.20e-01]


Epoch 15


Training Net: 100%|██████████| 17/17 [00:00<00:00, 25.86it/s, Loss_v=3.14e-01]


Epoch 16


Training Net: 100%|██████████| 17/17 [00:00<00:00, 23.84it/s, Loss_v=3.10e-01]


Epoch 17


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.17it/s, Loss_v=3.21e-01]


Epoch 18


Training Net: 100%|██████████| 17/17 [00:00<00:00, 26.41it/s, Loss_v=3.09e-01]


Epoch 19


Training Net: 100%|██████████| 17/17 [00:00<00:00, 27.22it/s, Loss_v=2.97e-01]


Iter 6


Self Play: 100%|██████████| 5/5 [00:41<00:00,  8.23s/it]


Epoch 0


Training Net: 100%|██████████| 21/21 [00:00<00:00, 27.48it/s, Loss_v=4.80e-01]


Epoch 1


Training Net: 100%|██████████| 21/21 [00:00<00:00, 28.28it/s, Loss_v=4.55e-01]


Epoch 2


Training Net: 100%|██████████| 21/21 [00:00<00:00, 28.21it/s, Loss_v=4.43e-01]


Epoch 3


Training Net: 100%|██████████| 21/21 [00:00<00:00, 26.82it/s, Loss_v=4.16e-01]


Epoch 4


Training Net: 100%|██████████| 21/21 [00:00<00:00, 25.52it/s, Loss_v=4.00e-01]


Epoch 5


Training Net: 100%|██████████| 21/21 [00:00<00:00, 27.85it/s, Loss_v=4.07e-01]


Epoch 6


Training Net: 100%|██████████| 21/21 [00:00<00:00, 27.61it/s, Loss_v=4.03e-01]


Epoch 7


Training Net: 100%|██████████| 21/21 [00:00<00:00, 26.87it/s, Loss_v=3.97e-01]


Epoch 8


Training Net: 100%|██████████| 21/21 [00:00<00:00, 28.06it/s, Loss_v=4.04e-01]


Epoch 9


Training Net: 100%|██████████| 21/21 [00:00<00:00, 27.83it/s, Loss_v=3.84e-01]


Epoch 10


Training Net: 100%|██████████| 21/21 [00:00<00:00, 27.59it/s, Loss_v=3.79e-01]


Epoch 11


Training Net: 100%|██████████| 21/21 [00:00<00:00, 27.89it/s, Loss_v=3.73e-01]


Epoch 12


Training Net: 100%|██████████| 21/21 [00:00<00:00, 28.26it/s, Loss_v=3.72e-01]


Epoch 13


Training Net: 100%|██████████| 21/21 [00:00<00:00, 28.22it/s, Loss_v=3.60e-01]


Epoch 14


Training Net: 100%|██████████| 21/21 [00:00<00:00, 27.15it/s, Loss_v=3.66e-01]


Epoch 15


Training Net: 100%|██████████| 21/21 [00:00<00:00, 28.09it/s, Loss_v=3.55e-01]


Epoch 16


Training Net: 100%|██████████| 21/21 [00:00<00:00, 28.11it/s, Loss_v=3.48e-01]


Epoch 17


Training Net: 100%|██████████| 21/21 [00:00<00:00, 27.93it/s, Loss_v=3.66e-01]


Epoch 18


Training Net: 100%|██████████| 21/21 [00:00<00:00, 27.03it/s, Loss_v=3.53e-01]


Epoch 19


Training Net: 100%|██████████| 21/21 [00:00<00:00, 26.77it/s, Loss_v=3.40e-01]


Iter 7


Self Play: 100%|██████████| 5/5 [00:35<00:00,  7.08s/it]


Epoch 0


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.47it/s, Loss_v=3.92e-01]


Epoch 1


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.77it/s, Loss_v=3.90e-01]


Epoch 2


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.73it/s, Loss_v=3.83e-01]


Epoch 3


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.65it/s, Loss_v=3.72e-01]


Epoch 4


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.59it/s, Loss_v=3.67e-01]


Epoch 5


Training Net: 100%|██████████| 24/24 [00:00<00:00, 26.94it/s, Loss_v=3.67e-01]


Epoch 6


Training Net: 100%|██████████| 24/24 [00:00<00:00, 25.93it/s, Loss_v=3.75e-01]


Epoch 7


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.53it/s, Loss_v=3.69e-01]


Epoch 8


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.92it/s, Loss_v=3.69e-01]


Epoch 9


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.50it/s, Loss_v=3.58e-01]


Epoch 10


Training Net: 100%|██████████| 24/24 [00:00<00:00, 26.08it/s, Loss_v=3.51e-01]


Epoch 11


Training Net: 100%|██████████| 24/24 [00:00<00:00, 26.87it/s, Loss_v=3.36e-01]


Epoch 12


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.50it/s, Loss_v=3.51e-01]


Epoch 13


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.81it/s, Loss_v=3.41e-01]


Epoch 14


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.89it/s, Loss_v=3.43e-01]


Epoch 15


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.58it/s, Loss_v=3.47e-01]


Epoch 16


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.28it/s, Loss_v=3.52e-01]


Epoch 17


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.25it/s, Loss_v=3.33e-01]


Epoch 18


Training Net: 100%|██████████| 24/24 [00:00<00:00, 27.46it/s, Loss_v=3.42e-01]


Epoch 19


Training Net: 100%|██████████| 24/24 [00:00<00:00, 25.78it/s, Loss_v=3.53e-01]


Iter 8


Self Play: 100%|██████████| 5/5 [00:39<00:00,  7.85s/it]


Epoch 0


Training Net: 100%|██████████| 27/27 [00:01<00:00, 26.60it/s, Loss_v=3.45e-01]


Epoch 1


Training Net: 100%|██████████| 27/27 [00:01<00:00, 26.37it/s, Loss_v=3.39e-01]


Epoch 2


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.73it/s, Loss_v=3.42e-01]


Epoch 3


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.70it/s, Loss_v=3.27e-01]


Epoch 4


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.56it/s, Loss_v=3.41e-01]


Epoch 5


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.31it/s, Loss_v=3.33e-01]


Epoch 6


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.82it/s, Loss_v=3.25e-01]


Epoch 7


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.74it/s, Loss_v=3.29e-01]


Epoch 8


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.60it/s, Loss_v=3.23e-01]


Epoch 9


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.63it/s, Loss_v=3.31e-01]


Epoch 10


Training Net: 100%|██████████| 27/27 [00:01<00:00, 26.74it/s, Loss_v=3.28e-01]


Epoch 11


Training Net: 100%|██████████| 27/27 [00:00<00:00, 28.08it/s, Loss_v=3.07e-01]


Epoch 12


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.13it/s, Loss_v=3.14e-01]


Epoch 13


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.42it/s, Loss_v=3.13e-01]


Epoch 14


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.17it/s, Loss_v=3.27e-01]


Epoch 15


Training Net: 100%|██████████| 27/27 [00:01<00:00, 26.70it/s, Loss_v=3.21e-01]


Epoch 16


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.54it/s, Loss_v=3.07e-01]


Epoch 17


Training Net: 100%|██████████| 27/27 [00:01<00:00, 26.91it/s, Loss_v=3.14e-01]


Epoch 18


Training Net: 100%|██████████| 27/27 [00:01<00:00, 26.09it/s, Loss_v=3.26e-01]


Epoch 19


Training Net: 100%|██████████| 27/27 [00:00<00:00, 27.82it/s, Loss_v=3.07e-01]


Testing...


Arena.playGames (1): 100%|██████████| 25/25 [01:34<00:00,  3.78s/it]
Arena.playGames (2): 100%|██████████| 25/25 [01:32<00:00,  3.68s/it]
Arena.playGames (1): 100%|██████████| 25/25 [02:21<00:00,  5.65s/it]
Arena.playGames (2): 100%|██████████| 25/25 [02:12<00:00,  5.31s/it]


{'random': (47, 3, 0), 'mcts': (20, 30, 0)}
Iter 9


Self Play: 100%|██████████| 5/5 [00:43<00:00,  8.72s/it]


Epoch 0


Training Net: 100%|██████████| 31/31 [00:01<00:00, 26.30it/s, Loss_v=3.68e-01]


Epoch 1


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.81it/s, Loss_v=3.49e-01]


Epoch 2


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.95it/s, Loss_v=3.46e-01]


Epoch 3


Training Net: 100%|██████████| 31/31 [00:01<00:00, 26.50it/s, Loss_v=3.40e-01]


Epoch 4


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.29it/s, Loss_v=3.32e-01]


Epoch 5


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.40it/s, Loss_v=3.42e-01]


Epoch 6


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.91it/s, Loss_v=3.27e-01]


Epoch 7


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.94it/s, Loss_v=3.36e-01]


Epoch 8


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.61it/s, Loss_v=3.40e-01]


Epoch 9


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.70it/s, Loss_v=3.31e-01]


Epoch 10


Training Net: 100%|██████████| 31/31 [00:01<00:00, 28.22it/s, Loss_v=3.35e-01]


Epoch 11


Training Net: 100%|██████████| 31/31 [00:01<00:00, 26.26it/s, Loss_v=3.31e-01]


Epoch 12


Training Net: 100%|██████████| 31/31 [00:01<00:00, 28.06it/s, Loss_v=3.13e-01]


Epoch 13


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.82it/s, Loss_v=3.24e-01]


Epoch 14


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.05it/s, Loss_v=3.23e-01]


Epoch 15


Training Net: 100%|██████████| 31/31 [00:01<00:00, 28.01it/s, Loss_v=3.11e-01]


Epoch 16


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.65it/s, Loss_v=3.15e-01]


Epoch 17


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.71it/s, Loss_v=3.07e-01]


Epoch 18


Training Net: 100%|██████████| 31/31 [00:01<00:00, 28.11it/s, Loss_v=3.10e-01]


Epoch 19


Training Net: 100%|██████████| 31/31 [00:01<00:00, 27.90it/s, Loss_v=3.18e-01]


Iter 10


Self Play: 100%|██████████| 5/5 [00:42<00:00,  8.51s/it]


Epoch 0


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.28it/s, Loss_v=3.46e-01]


Epoch 1


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.80it/s, Loss_v=3.22e-01]


Epoch 2


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.50it/s, Loss_v=3.18e-01]


Epoch 3


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.59it/s, Loss_v=3.17e-01]


Epoch 4


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.51it/s, Loss_v=3.29e-01]


Epoch 5


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.28it/s, Loss_v=3.15e-01]


Epoch 6


Training Net: 100%|██████████| 32/32 [00:01<00:00, 28.24it/s, Loss_v=3.16e-01]


Epoch 7


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.69it/s, Loss_v=3.23e-01]


Epoch 8


Training Net: 100%|██████████| 32/32 [00:01<00:00, 28.37it/s, Loss_v=3.11e-01]


Epoch 9


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.82it/s, Loss_v=3.05e-01]


Epoch 10


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.91it/s, Loss_v=3.11e-01]


Epoch 11


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.75it/s, Loss_v=3.01e-01]


Epoch 12


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.77it/s, Loss_v=3.25e-01]


Epoch 13


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.54it/s, Loss_v=3.07e-01]


Epoch 14


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.48it/s, Loss_v=3.07e-01]


Epoch 15


Training Net: 100%|██████████| 32/32 [00:01<00:00, 28.15it/s, Loss_v=3.15e-01]


Epoch 16


Training Net: 100%|██████████| 32/32 [00:01<00:00, 28.21it/s, Loss_v=2.99e-01]


Epoch 17


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.83it/s, Loss_v=3.01e-01]


Epoch 18


Training Net: 100%|██████████| 32/32 [00:01<00:00, 28.25it/s, Loss_v=2.98e-01]


Epoch 19


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.76it/s, Loss_v=2.92e-01]


Iter 11


Self Play: 100%|██████████| 5/5 [00:42<00:00,  8.59s/it]


Epoch 0


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.48it/s, Loss_v=3.53e-01]


Epoch 1


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.59it/s, Loss_v=3.48e-01]


Epoch 2


Training Net: 100%|██████████| 32/32 [00:01<00:00, 28.28it/s, Loss_v=3.63e-01]


Epoch 3


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.35it/s, Loss_v=3.48e-01]


Epoch 4


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.78it/s, Loss_v=3.41e-01]


Epoch 5


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.67it/s, Loss_v=3.46e-01]


Epoch 6


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.95it/s, Loss_v=3.32e-01]


Epoch 7


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.92it/s, Loss_v=3.31e-01]


Epoch 8


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.22it/s, Loss_v=3.36e-01]


Epoch 9


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.32it/s, Loss_v=3.34e-01]


Epoch 10


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.45it/s, Loss_v=3.34e-01]


Epoch 11


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.95it/s, Loss_v=3.31e-01]


Epoch 12


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.67it/s, Loss_v=3.34e-01]


Epoch 13


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.81it/s, Loss_v=3.24e-01]


Epoch 14


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.43it/s, Loss_v=3.46e-01]


Epoch 15


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.52it/s, Loss_v=3.23e-01]


Epoch 16


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.92it/s, Loss_v=3.28e-01]


Epoch 17


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.53it/s, Loss_v=3.19e-01]


Epoch 18


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.39it/s, Loss_v=3.23e-01]


Epoch 19


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.00it/s, Loss_v=3.17e-01]


Iter 12


Self Play: 100%|██████████| 5/5 [00:44<00:00,  8.84s/it]


Epoch 0


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.28it/s, Loss_v=4.18e-01]


Epoch 1


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.75it/s, Loss_v=3.98e-01]


Epoch 2


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.59it/s, Loss_v=3.80e-01]


Epoch 3


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.53it/s, Loss_v=3.79e-01]


Epoch 4


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.03it/s, Loss_v=3.67e-01]


Epoch 5


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.35it/s, Loss_v=3.63e-01]


Epoch 6


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.49it/s, Loss_v=3.49e-01]


Epoch 7


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.38it/s, Loss_v=3.79e-01]


Epoch 8


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.29it/s, Loss_v=3.66e-01]


Epoch 9


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.38it/s, Loss_v=3.68e-01]


Epoch 10


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.85it/s, Loss_v=3.59e-01]


Epoch 11


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.76it/s, Loss_v=3.60e-01]


Epoch 12


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.32it/s, Loss_v=3.51e-01]


Epoch 13


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.18it/s, Loss_v=3.60e-01]


Epoch 14


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.81it/s, Loss_v=3.38e-01]


Epoch 15


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.08it/s, Loss_v=3.38e-01]


Epoch 16


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.85it/s, Loss_v=3.35e-01]


Epoch 17


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.94it/s, Loss_v=3.39e-01]


Epoch 18


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.99it/s, Loss_v=3.47e-01]


Epoch 19


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.15it/s, Loss_v=3.28e-01]


Testing...


Arena.playGames (1): 100%|██████████| 25/25 [01:46<00:00,  4.27s/it]
Arena.playGames (2): 100%|██████████| 25/25 [01:43<00:00,  4.13s/it]
Arena.playGames (1): 100%|██████████| 25/25 [02:23<00:00,  5.76s/it]
Arena.playGames (2): 100%|██████████| 25/25 [02:22<00:00,  5.69s/it]


{'random': (42, 8, 0), 'mcts': (18, 31, 1)}
Iter 13


Self Play: 100%|██████████| 5/5 [00:42<00:00,  8.54s/it]


Epoch 0


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.45it/s, Loss_v=4.00e-01]


Epoch 1


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.44it/s, Loss_v=3.84e-01]


Epoch 2


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.18it/s, Loss_v=3.53e-01]


Epoch 3


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.16it/s, Loss_v=3.51e-01]


Epoch 4


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.33it/s, Loss_v=3.66e-01]


Epoch 5


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.39it/s, Loss_v=3.41e-01]


Epoch 6


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.87it/s, Loss_v=3.52e-01]


Epoch 7


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.38it/s, Loss_v=3.49e-01]


Epoch 8


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.59it/s, Loss_v=3.37e-01]


Epoch 9


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.95it/s, Loss_v=3.32e-01]


Epoch 10


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.77it/s, Loss_v=3.33e-01]


Epoch 11


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.82it/s, Loss_v=3.32e-01]


Epoch 12


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.23it/s, Loss_v=3.24e-01]


Epoch 13


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.91it/s, Loss_v=3.42e-01]


Epoch 14


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.34it/s, Loss_v=3.28e-01]


Epoch 15


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.88it/s, Loss_v=3.24e-01]


Epoch 16


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.24it/s, Loss_v=3.28e-01]


Epoch 17


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.09it/s, Loss_v=3.29e-01]


Epoch 18


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.30it/s, Loss_v=3.26e-01]


Epoch 19


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.58it/s, Loss_v=3.31e-01]


Iter 14


Self Play: 100%|██████████| 5/5 [00:42<00:00,  8.46s/it]


Epoch 0


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.91it/s, Loss_v=4.17e-01]


Epoch 1


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.20it/s, Loss_v=3.89e-01]


Epoch 2


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.84it/s, Loss_v=4.14e-01]


Epoch 3


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.29it/s, Loss_v=3.92e-01]


Epoch 4


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.31it/s, Loss_v=3.87e-01]


Epoch 5


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.22it/s, Loss_v=3.66e-01]


Epoch 6


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.12it/s, Loss_v=3.66e-01]


Epoch 7


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.20it/s, Loss_v=3.58e-01]


Epoch 8


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.62it/s, Loss_v=3.53e-01]


Epoch 9


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.62it/s, Loss_v=3.67e-01]


Epoch 10


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.01it/s, Loss_v=3.54e-01]


Epoch 11


Training Net: 100%|██████████| 32/32 [00:01<00:00, 28.02it/s, Loss_v=3.61e-01]


Epoch 12


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.68it/s, Loss_v=3.42e-01]


Epoch 13


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.55it/s, Loss_v=3.59e-01]


Epoch 14


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.28it/s, Loss_v=3.50e-01]


Epoch 15


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.37it/s, Loss_v=3.46e-01]


Epoch 16


Training Net: 100%|██████████| 32/32 [00:01<00:00, 28.04it/s, Loss_v=3.51e-01]


Epoch 17


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.93it/s, Loss_v=3.57e-01]


Epoch 18


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.44it/s, Loss_v=3.66e-01]


Epoch 19


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.56it/s, Loss_v=3.42e-01]


Iter 15


Self Play: 100%|██████████| 5/5 [00:41<00:00,  8.39s/it]


Epoch 0


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.62it/s, Loss_v=4.15e-01]


Epoch 1


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.18it/s, Loss_v=3.96e-01]


Epoch 2


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.30it/s, Loss_v=3.80e-01]


Epoch 3


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.88it/s, Loss_v=3.80e-01]


Epoch 4


Training Net: 100%|██████████| 32/32 [00:01<00:00, 28.07it/s, Loss_v=3.53e-01]


Epoch 5


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.38it/s, Loss_v=3.60e-01]


Epoch 6


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.90it/s, Loss_v=3.63e-01]


Epoch 7


Training Net: 100%|██████████| 32/32 [00:01<00:00, 25.73it/s, Loss_v=3.54e-01]


Epoch 8


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.43it/s, Loss_v=3.44e-01]


Epoch 9


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.62it/s, Loss_v=3.48e-01]


Epoch 10


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.32it/s, Loss_v=3.34e-01]


Epoch 11


Training Net: 100%|██████████| 32/32 [00:01<00:00, 25.05it/s, Loss_v=3.46e-01]


Epoch 12


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.50it/s, Loss_v=3.34e-01]


Epoch 13


Training Net: 100%|██████████| 32/32 [00:01<00:00, 25.51it/s, Loss_v=3.22e-01]


Epoch 14


Training Net: 100%|██████████| 32/32 [00:01<00:00, 25.85it/s, Loss_v=3.31e-01]


Epoch 15


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.94it/s, Loss_v=3.35e-01]


Epoch 16


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.29it/s, Loss_v=3.18e-01]


Epoch 17


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.17it/s, Loss_v=3.23e-01]


Epoch 18


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.37it/s, Loss_v=3.12e-01]


Epoch 19


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.21it/s, Loss_v=3.07e-01]


Iter 16


Self Play: 100%|██████████| 5/5 [00:43<00:00,  8.67s/it]


Epoch 0


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.45it/s, Loss_v=4.67e-01]


Epoch 1


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.02it/s, Loss_v=4.18e-01]


Epoch 2


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.91it/s, Loss_v=3.99e-01]


Epoch 3


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.53it/s, Loss_v=3.91e-01]


Epoch 4


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.96it/s, Loss_v=3.87e-01]


Epoch 5


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.41it/s, Loss_v=3.80e-01]


Epoch 6


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.87it/s, Loss_v=3.76e-01]


Epoch 7


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.14it/s, Loss_v=3.64e-01]


Epoch 8


Training Net: 100%|██████████| 32/32 [00:01<00:00, 28.09it/s, Loss_v=3.51e-01]


Epoch 9


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.79it/s, Loss_v=3.60e-01]


Epoch 10


Training Net: 100%|██████████| 32/32 [00:01<00:00, 26.61it/s, Loss_v=3.77e-01]


Epoch 11


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.52it/s, Loss_v=3.60e-01]


Epoch 12


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.77it/s, Loss_v=3.35e-01]


Epoch 13


Training Net: 100%|██████████| 32/32 [00:01<00:00, 27.60it/s, Loss_v=3.44e-01]


Epoch 14


Training Net:  12%|█▎        | 4/32 [00:00<00:01, 22.40it/s, Loss_v=3.47e-01]


KeyboardInterrupt: 