In [None]:
import time

from alphago.games import NoughtsAndCrosses
from alphago.estimator import NACNetEstimator
from alphago.alphago import train_alphago

from alphago.utilities import memoize_instance

In [None]:
learning_rate = 1e-4
game = NoughtsAndCrosses()

memoize_instance(game)

def create_estimator():
    return NACNetEstimator(learning_rate=learning_rate, action_indices=game.action_indices)

self_play_iters = 10
training_iters = 1000
evaluate_every = 2
alphago_steps = 1000
mcts_iters = 10
c_puct = 1.0
replay_length = 10000
num_evaluate_games = 20
win_rate = 0.55

current_time_format = time.strftime('experiment-%Y-%m-%d_%H:%M:%S')
path = "experiments/{}/".format(current_time_format)
checkpoint_path = path + 'checkpoints/'
summary_path = path + 'logs/'
restore_step = None

losses = train_alphago(game, create_estimator, self_play_iters=self_play_iters,
                       training_iters=training_iters,
                       checkpoint_path=checkpoint_path,
                       summary_path=summary_path,
                       alphago_steps=alphago_steps,
                       evaluate_every=evaluate_every, batch_size=32,
                       mcts_iters=mcts_iters, c_puct=c_puct,
                       replay_length=replay_length,
                       num_evaluate_games=num_evaluate_games, win_rate=win_rate,
                       restore_step=restore_step, verbose=True)

100%|██████████| 10/10 [00:00<00:00, 16.97it/s]
100%|██████████| 1000/1000 [00:01<00:00, 617.35it/s]
Win1/Win2/Draw: 0/1/0:   5%|▌         | 1/20 [00:00<00:02,  7.20it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/6/2: 100%|██████████| 20/20 [00:01<00:00, 14.02it/s]
Win1/Win2/Draw: 12/6/2: 100%|██████████| 20/20 [00:01<00:00, 14.58it/s]
Win1/Win2/Draw: 3/0/1:  20%|██        | 4/20 [00:00<00:00, 25.36it/s]

Self-play player wins: 18, Training player wins: 18, Draws: 4
Win + draw rate for training player: 0.55


Win1/Win2/Draw: 17/1/2: 100%|██████████| 20/20 [00:00<00:00, 25.24it/s]
Win1/Win2/Draw: 8/8/4: 100%|██████████| 20/20 [00:00<00:00, 27.65it/s]
 20%|██        | 2/10 [00:00<00:00, 17.64it/s]

Training player vs random. Wins: 25, Losses: 9, Draws: 6


100%|██████████| 10/10 [00:00<00:00, 17.78it/s]
100%|██████████| 1000/1000 [00:02<00:00, 336.29it/s]
100%|██████████| 10/10 [00:00<00:00, 17.91it/s]
100%|██████████| 1000/1000 [00:02<00:00, 345.52it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:01, 15.20it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 16/4/0: 100%|██████████| 20/20 [00:01<00:00, 17.76it/s]
Win1/Win2/Draw: 14/4/2: 100%|██████████| 20/20 [00:01<00:00, 17.01it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 25.96it/s]

Self-play player wins: 20, Training player wins: 18, Draws: 2
Win + draw rate for training player: 0.5


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 26.18it/s]
Win1/Win2/Draw: 8/6/6: 100%|██████████| 20/20 [00:00<00:00, 27.58it/s]
 30%|███       | 3/10 [00:00<00:00, 20.61it/s]

Training player vs random. Wins: 25, Losses: 8, Draws: 7


100%|██████████| 10/10 [00:00<00:00, 17.45it/s]
100%|██████████| 1000/1000 [00:02<00:00, 335.36it/s]
100%|██████████| 10/10 [00:00<00:00, 18.64it/s]
100%|██████████| 1000/1000 [00:02<00:00, 350.72it/s]
Win1/Win2/Draw: 2/0/0:  10%|█         | 2/20 [00:00<00:01, 15.34it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:01<00:00, 18.24it/s]
Win1/Win2/Draw: 12/6/2: 100%|██████████| 20/20 [00:01<00:00, 16.87it/s]
Win1/Win2/Draw: 3/0/1:  20%|██        | 4/20 [00:00<00:00, 18.99it/s]

Self-play player wins: 23, Training player wins: 14, Draws: 3
Win + draw rate for training player: 0.425


Win1/Win2/Draw: 12/4/4: 100%|██████████| 20/20 [00:00<00:00, 23.80it/s]
Win1/Win2/Draw: 5/9/6: 100%|██████████| 20/20 [00:00<00:00, 28.77it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

Training player vs random. Wins: 21, Losses: 9, Draws: 10


100%|██████████| 10/10 [00:00<00:00, 18.17it/s]
100%|██████████| 1000/1000 [00:02<00:00, 333.40it/s]
100%|██████████| 10/10 [00:00<00:00, 19.79it/s]
100%|██████████| 1000/1000 [00:03<00:00, 328.51it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:01, 15.46it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 14/4/2: 100%|██████████| 20/20 [00:01<00:00, 16.87it/s]
Win1/Win2/Draw: 8/9/3: 100%|██████████| 20/20 [00:01<00:00, 15.44it/s]
Win1/Win2/Draw: 4/1/0:  25%|██▌       | 5/20 [00:00<00:00, 25.32it/s]

Self-play player wins: 23, Training player wins: 12, Draws: 5
Win + draw rate for training player: 0.425


Win1/Win2/Draw: 16/4/0: 100%|██████████| 20/20 [00:00<00:00, 26.64it/s]
Win1/Win2/Draw: 5/11/4: 100%|██████████| 20/20 [00:00<00:00, 26.25it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

Training player vs random. Wins: 27, Losses: 9, Draws: 4


100%|██████████| 10/10 [00:00<00:00, 19.79it/s]
100%|██████████| 1000/1000 [00:03<00:00, 305.23it/s]
100%|██████████| 10/10 [00:00<00:00, 16.78it/s]
100%|██████████| 1000/1000 [00:02<00:00, 341.36it/s]
Win1/Win2/Draw: 1/0/0:   5%|▌         | 1/20 [00:00<00:02,  6.90it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 16/2/2: 100%|██████████| 20/20 [00:01<00:00, 14.84it/s]
Win1/Win2/Draw: 10/7/3: 100%|██████████| 20/20 [00:01<00:00, 15.22it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:00, 20.68it/s]

Self-play player wins: 23, Training player wins: 12, Draws: 5
Win + draw rate for training player: 0.425


Win1/Win2/Draw: 18/0/2: 100%|██████████| 20/20 [00:00<00:00, 23.36it/s]
Win1/Win2/Draw: 8/10/2: 100%|██████████| 20/20 [00:00<00:00, 27.50it/s]
 20%|██        | 2/10 [00:00<00:00, 14.03it/s]

Training player vs random. Wins: 28, Losses: 8, Draws: 4


100%|██████████| 10/10 [00:00<00:00, 14.68it/s]
100%|██████████| 1000/1000 [00:03<00:00, 320.63it/s]
100%|██████████| 10/10 [00:00<00:00, 18.57it/s]
100%|██████████| 1000/1000 [00:03<00:00, 312.28it/s]
Win1/Win2/Draw: 1/0/1:  10%|█         | 2/20 [00:00<00:01, 11.11it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 13/5/2: 100%|██████████| 20/20 [00:01<00:00, 13.36it/s]
Win1/Win2/Draw: 15/3/2: 100%|██████████| 20/20 [00:01<00:00, 13.42it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 25.01it/s]

Self-play player wins: 16, Training player wins: 20, Draws: 4
Win + draw rate for training player: 0.6


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:00<00:00, 24.70it/s]
Win1/Win2/Draw: 6/7/7: 100%|██████████| 20/20 [00:00<00:00, 25.82it/s]


Training player vs random. Wins: 24, Losses: 8, Draws: 8
Updating self-play player.
Restoring from step: 10
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/10.checkpoint


100%|██████████| 10/10 [00:00<00:00, 18.78it/s]
100%|██████████| 1000/1000 [00:02<00:00, 344.29it/s]
100%|██████████| 10/10 [00:00<00:00, 14.28it/s]
100%|██████████| 1000/1000 [00:03<00:00, 293.20it/s]
Win1/Win2/Draw: 1/1/0:  10%|█         | 2/20 [00:00<00:01, 10.99it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 9/6/5: 100%|██████████| 20/20 [00:01<00:00, 12.28it/s]
Win1/Win2/Draw: 13/7/0: 100%|██████████| 20/20 [00:01<00:00, 13.82it/s]
Win1/Win2/Draw: 1/1/1:  15%|█▌        | 3/20 [00:00<00:00, 20.19it/s]

Self-play player wins: 16, Training player wins: 19, Draws: 5
Win + draw rate for training player: 0.6


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:00<00:00, 22.95it/s]
Win1/Win2/Draw: 10/6/4: 100%|██████████| 20/20 [00:00<00:00, 24.00it/s]


Training player vs random. Wins: 23, Losses: 12, Draws: 5
Updating self-play player.
Restoring from step: 12
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/12.checkpoint


100%|██████████| 10/10 [00:00<00:00, 18.26it/s]
100%|██████████| 1000/1000 [00:02<00:00, 353.52it/s]
100%|██████████| 10/10 [00:00<00:00, 22.25it/s]
100%|██████████| 1000/1000 [00:02<00:00, 354.29it/s]
Win1/Win2/Draw: 3/0/1:  20%|██        | 4/20 [00:00<00:00, 22.07it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 10/8/2: 100%|██████████| 20/20 [00:00<00:00, 20.17it/s]
Win1/Win2/Draw: 13/2/5: 100%|██████████| 20/20 [00:01<00:00, 15.65it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 23.93it/s]

Self-play player wins: 12, Training player wins: 21, Draws: 7
Win + draw rate for training player: 0.7


Win1/Win2/Draw: 16/0/4: 100%|██████████| 20/20 [00:00<00:00, 23.89it/s]
Win1/Win2/Draw: 6/10/4: 100%|██████████| 20/20 [00:00<00:00, 26.24it/s]


Training player vs random. Wins: 26, Losses: 6, Draws: 8
Updating self-play player.
Restoring from step: 14
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/14.checkpoint


100%|██████████| 10/10 [00:00<00:00, 16.16it/s]
100%|██████████| 1000/1000 [00:03<00:00, 302.64it/s]
100%|██████████| 10/10 [00:00<00:00, 15.13it/s]
100%|██████████| 1000/1000 [00:03<00:00, 270.57it/s]
Win1/Win2/Draw: 0/3/0:  15%|█▌        | 3/20 [00:00<00:00, 18.14it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 7/7/6: 100%|██████████| 20/20 [00:01<00:00, 17.36it/s]
Win1/Win2/Draw: 10/8/2: 100%|██████████| 20/20 [00:01<00:00, 18.54it/s]
Win1/Win2/Draw: 5/0/1:  30%|███       | 6/20 [00:00<00:00, 38.09it/s]

Self-play player wins: 15, Training player wins: 17, Draws: 8
Win + draw rate for training player: 0.625


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:00<00:00, 28.47it/s]
Win1/Win2/Draw: 7/8/5: 100%|██████████| 20/20 [00:00<00:00, 26.36it/s]


Training player vs random. Wins: 25, Losses: 9, Draws: 6
Updating self-play player.
Restoring from step: 16
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/16.checkpoint


100%|██████████| 10/10 [00:00<00:00, 18.15it/s]
100%|██████████| 1000/1000 [00:02<00:00, 348.79it/s]
100%|██████████| 10/10 [00:00<00:00, 18.86it/s]
100%|██████████| 1000/1000 [00:05<00:00, 196.73it/s]
Win1/Win2/Draw: 1/0/0:   5%|▌         | 1/20 [00:00<00:02,  6.74it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 9/8/3: 100%|██████████| 20/20 [00:02<00:00,  9.57it/s]
Win1/Win2/Draw: 12/6/2: 100%|██████████| 20/20 [00:01<00:00, 16.98it/s]
Win1/Win2/Draw: 3/2/0:  25%|██▌       | 5/20 [00:00<00:00, 34.73it/s]

Self-play player wins: 15, Training player wins: 20, Draws: 5
Win + draw rate for training player: 0.625


Win1/Win2/Draw: 14/2/4: 100%|██████████| 20/20 [00:00<00:00, 24.57it/s]
Win1/Win2/Draw: 4/10/6: 100%|██████████| 20/20 [00:00<00:00, 25.19it/s]


Training player vs random. Wins: 24, Losses: 6, Draws: 10
Updating self-play player.
Restoring from step: 18
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/18.checkpoint


100%|██████████| 10/10 [00:00<00:00, 21.89it/s]
100%|██████████| 1000/1000 [00:02<00:00, 375.35it/s]
100%|██████████| 10/10 [00:00<00:00, 22.81it/s]
100%|██████████| 1000/1000 [00:02<00:00, 360.32it/s]
Win1/Win2/Draw: 0/1/1:  10%|█         | 2/20 [00:00<00:01, 12.26it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/7/1: 100%|██████████| 20/20 [00:01<00:00, 14.98it/s]
Win1/Win2/Draw: 10/7/3: 100%|██████████| 20/20 [00:01<00:00, 15.48it/s]
Win1/Win2/Draw: 5/0/1:  30%|███       | 6/20 [00:00<00:00, 35.28it/s]

Self-play player wins: 19, Training player wins: 17, Draws: 4
Win + draw rate for training player: 0.525


Win1/Win2/Draw: 15/1/4: 100%|██████████| 20/20 [00:00<00:00, 29.77it/s]
Win1/Win2/Draw: 7/11/2: 100%|██████████| 20/20 [00:00<00:00, 38.16it/s]
 30%|███       | 3/10 [00:00<00:00, 19.99it/s]

Training player vs random. Wins: 26, Losses: 8, Draws: 6


100%|██████████| 10/10 [00:00<00:00, 21.05it/s]
100%|██████████| 1000/1000 [00:02<00:00, 381.34it/s]
100%|██████████| 10/10 [00:00<00:00, 18.25it/s]
100%|██████████| 1000/1000 [00:03<00:00, 276.51it/s]
Win1/Win2/Draw: 1/1/1:  15%|█▌        | 3/20 [00:00<00:00, 19.40it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 10/7/3: 100%|██████████| 20/20 [00:01<00:00, 19.96it/s]
Win1/Win2/Draw: 8/10/2: 100%|██████████| 20/20 [00:01<00:00, 16.81it/s]
Win1/Win2/Draw: 3/0/2:  25%|██▌       | 5/20 [00:00<00:00, 28.50it/s]

Self-play player wins: 20, Training player wins: 15, Draws: 5
Win + draw rate for training player: 0.5


Win1/Win2/Draw: 14/3/3: 100%|██████████| 20/20 [00:00<00:00, 29.47it/s]
Win1/Win2/Draw: 6/9/5: 100%|██████████| 20/20 [00:00<00:00, 35.57it/s]
 30%|███       | 3/10 [00:00<00:00, 25.32it/s]

Training player vs random. Wins: 23, Losses: 9, Draws: 8


100%|██████████| 10/10 [00:00<00:00, 20.53it/s]
100%|██████████| 1000/1000 [00:02<00:00, 345.87it/s]
100%|██████████| 10/10 [00:00<00:00, 23.10it/s]
100%|██████████| 1000/1000 [00:04<00:00, 220.37it/s]
Win1/Win2/Draw: 1/0/0:   5%|▌         | 1/20 [00:00<00:01, 11.44it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:01<00:00, 11.24it/s]
Win1/Win2/Draw: 11/5/4: 100%|██████████| 20/20 [00:01<00:00, 16.48it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 25.86it/s]

Self-play player wins: 24, Training player wins: 11, Draws: 5
Win + draw rate for training player: 0.4


Win1/Win2/Draw: 15/4/1: 100%|██████████| 20/20 [00:01<00:00, 16.66it/s]
Win1/Win2/Draw: 8/10/2: 100%|██████████| 20/20 [00:01<00:00, 18.04it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

Training player vs random. Wins: 25, Losses: 12, Draws: 3


100%|██████████| 10/10 [00:00<00:00, 16.05it/s]
100%|██████████| 1000/1000 [00:03<00:00, 301.29it/s]
100%|██████████| 10/10 [00:00<00:00, 18.06it/s]
100%|██████████| 1000/1000 [00:03<00:00, 260.91it/s]
Win1/Win2/Draw: 1/2/0:  15%|█▌        | 3/20 [00:00<00:01, 16.64it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 9/11/0: 100%|██████████| 20/20 [00:01<00:00, 15.71it/s]
Win1/Win2/Draw: 11/3/6: 100%|██████████| 20/20 [00:01<00:00, 10.39it/s]
Win1/Win2/Draw: 4/1/1:  30%|███       | 6/20 [00:00<00:00, 32.35it/s]

Self-play player wins: 12, Training player wins: 22, Draws: 6
Win + draw rate for training player: 0.7


Win1/Win2/Draw: 18/1/1: 100%|██████████| 20/20 [00:00<00:00, 23.50it/s]
Win1/Win2/Draw: 7/10/3: 100%|██████████| 20/20 [00:00<00:00, 24.20it/s]


Training player vs random. Wins: 28, Losses: 8, Draws: 4
Updating self-play player.
Restoring from step: 26
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/26.checkpoint


100%|██████████| 10/10 [00:00<00:00, 13.56it/s]
100%|██████████| 1000/1000 [00:03<00:00, 285.60it/s]
100%|██████████| 10/10 [00:00<00:00, 14.63it/s]
100%|██████████| 1000/1000 [00:03<00:00, 269.71it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:00, 17.81it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/7/1: 100%|██████████| 20/20 [00:01<00:00, 15.73it/s]
Win1/Win2/Draw: 9/11/0: 100%|██████████| 20/20 [00:01<00:00, 12.97it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:00, 20.65it/s]

Self-play player wins: 23, Training player wins: 16, Draws: 1
Win + draw rate for training player: 0.425


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:00<00:00, 24.82it/s]
Win1/Win2/Draw: 5/11/4: 100%|██████████| 20/20 [00:00<00:00, 27.46it/s]
 20%|██        | 2/10 [00:00<00:00, 14.91it/s]

Training player vs random. Wins: 28, Losses: 7, Draws: 5


100%|██████████| 10/10 [00:00<00:00, 15.13it/s]
100%|██████████| 1000/1000 [00:03<00:00, 265.03it/s]
100%|██████████| 10/10 [00:00<00:00, 14.54it/s]
100%|██████████| 1000/1000 [00:03<00:00, 292.75it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:00, 21.28it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 17/3/0: 100%|██████████| 20/20 [00:00<00:00, 20.32it/s]
Win1/Win2/Draw: 9/9/2: 100%|██████████| 20/20 [00:01<00:00, 19.20it/s]
Win1/Win2/Draw: 3/0/1:  20%|██        | 4/20 [00:00<00:00, 35.74it/s]

Self-play player wins: 26, Training player wins: 12, Draws: 2
Win + draw rate for training player: 0.35


Win1/Win2/Draw: 16/3/1: 100%|██████████| 20/20 [00:01<00:00, 15.34it/s]
Win1/Win2/Draw: 2/14/4: 100%|██████████| 20/20 [00:01<00:00, 13.45it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

Training player vs random. Wins: 30, Losses: 5, Draws: 5


100%|██████████| 10/10 [00:01<00:00,  8.37it/s]
100%|██████████| 1000/1000 [00:03<00:00, 307.74it/s]
100%|██████████| 10/10 [00:00<00:00, 20.90it/s]
100%|██████████| 1000/1000 [00:03<00:00, 328.77it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:00, 18.86it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 15/4/1: 100%|██████████| 20/20 [00:01<00:00, 17.77it/s]
Win1/Win2/Draw: 9/8/3: 100%|██████████| 20/20 [00:01<00:00, 17.28it/s]
Win1/Win2/Draw: 3/0/1:  20%|██        | 4/20 [00:00<00:00, 22.16it/s]

Self-play player wins: 23, Training player wins: 13, Draws: 4
Win + draw rate for training player: 0.425


Win1/Win2/Draw: 18/1/1: 100%|██████████| 20/20 [00:00<00:00, 22.64it/s]
Win1/Win2/Draw: 8/7/5: 100%|██████████| 20/20 [00:00<00:00, 27.89it/s]
 20%|██        | 2/10 [00:00<00:00, 18.98it/s]

Training player vs random. Wins: 25, Losses: 9, Draws: 6


100%|██████████| 10/10 [00:00<00:00, 18.55it/s]
100%|██████████| 1000/1000 [00:03<00:00, 287.47it/s]
100%|██████████| 10/10 [00:00<00:00, 16.98it/s]
100%|██████████| 1000/1000 [00:03<00:00, 317.43it/s]
Win1/Win2/Draw: 1/0/1:  10%|█         | 2/20 [00:00<00:01, 12.86it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 16/1/3: 100%|██████████| 20/20 [00:01<00:00, 13.84it/s]
Win1/Win2/Draw: 6/12/2: 100%|██████████| 20/20 [00:01<00:00, 15.84it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 22.08it/s]

Self-play player wins: 28, Training player wins: 7, Draws: 5
Win + draw rate for training player: 0.3


Win1/Win2/Draw: 17/3/0: 100%|██████████| 20/20 [00:00<00:00, 22.56it/s]
Win1/Win2/Draw: 3/13/4: 100%|██████████| 20/20 [00:00<00:00, 27.58it/s]
 30%|███       | 3/10 [00:00<00:00, 24.82it/s]

Training player vs random. Wins: 30, Losses: 6, Draws: 4


100%|██████████| 10/10 [00:00<00:00, 13.90it/s]
100%|██████████| 1000/1000 [00:03<00:00, 272.05it/s]
100%|██████████| 10/10 [00:00<00:00, 16.79it/s]
100%|██████████| 1000/1000 [00:03<00:00, 277.17it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:00, 17.70it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 13/5/2: 100%|██████████| 20/20 [00:01<00:00, 18.90it/s]
Win1/Win2/Draw: 10/9/1: 100%|██████████| 20/20 [00:00<00:00, 20.66it/s]
Win1/Win2/Draw: 7/0/0:  35%|███▌      | 7/20 [00:00<00:00, 38.15it/s]

Self-play player wins: 22, Training player wins: 15, Draws: 3
Win + draw rate for training player: 0.45


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 38.11it/s]
Win1/Win2/Draw: 6/5/9: 100%|██████████| 20/20 [00:00<00:00, 34.74it/s]
 30%|███       | 3/10 [00:00<00:00, 19.67it/s]

Training player vs random. Wins: 24, Losses: 6, Draws: 10


100%|██████████| 10/10 [00:00<00:00, 20.86it/s]
100%|██████████| 1000/1000 [00:02<00:00, 347.74it/s]
100%|██████████| 10/10 [00:00<00:00, 22.04it/s]
100%|██████████| 1000/1000 [00:02<00:00, 347.81it/s]
Win1/Win2/Draw: 1/1/1:  15%|█▌        | 3/20 [00:00<00:00, 18.16it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 15/3/2: 100%|██████████| 20/20 [00:01<00:00, 19.90it/s]
Win1/Win2/Draw: 10/9/1: 100%|██████████| 20/20 [00:01<00:00, 19.99it/s]
Win1/Win2/Draw: 5/1/0:  30%|███       | 6/20 [00:00<00:00, 32.18it/s]

Self-play player wins: 24, Training player wins: 13, Draws: 3
Win + draw rate for training player: 0.4


Win1/Win2/Draw: 15/3/2: 100%|██████████| 20/20 [00:00<00:00, 34.64it/s]
Win1/Win2/Draw: 8/9/3: 100%|██████████| 20/20 [00:00<00:00, 37.38it/s]
 30%|███       | 3/10 [00:00<00:00, 21.21it/s]

Training player vs random. Wins: 24, Losses: 11, Draws: 5


100%|██████████| 10/10 [00:00<00:00, 21.14it/s]
100%|██████████| 1000/1000 [00:02<00:00, 371.62it/s]
100%|██████████| 10/10 [00:00<00:00, 23.50it/s]
100%|██████████| 1000/1000 [00:02<00:00, 349.73it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 21.70it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 15/3/2: 100%|██████████| 20/20 [00:01<00:00, 19.41it/s]
Win1/Win2/Draw: 7/10/3: 100%|██████████| 20/20 [00:01<00:00, 17.89it/s]
Win1/Win2/Draw: 5/0/1:  30%|███       | 6/20 [00:00<00:00, 29.19it/s]

Self-play player wins: 25, Training player wins: 10, Draws: 5
Win + draw rate for training player: 0.375


Win1/Win2/Draw: 15/2/3: 100%|██████████| 20/20 [00:00<00:00, 29.99it/s]
Win1/Win2/Draw: 5/10/5: 100%|██████████| 20/20 [00:00<00:00, 32.36it/s]
 20%|██        | 2/10 [00:00<00:00, 19.51it/s]

Training player vs random. Wins: 25, Losses: 7, Draws: 8


100%|██████████| 10/10 [00:00<00:00, 20.35it/s]
100%|██████████| 1000/1000 [00:03<00:00, 323.13it/s]
100%|██████████| 10/10 [00:00<00:00, 21.49it/s]
100%|██████████| 1000/1000 [00:02<00:00, 348.43it/s]
Win1/Win2/Draw: 2/0/1:  15%|█▌        | 3/20 [00:00<00:01, 16.58it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 11/6/3: 100%|██████████| 20/20 [00:01<00:00, 15.46it/s]
Win1/Win2/Draw: 8/8/4: 100%|██████████| 20/20 [00:01<00:00, 15.92it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 24.87it/s]

Self-play player wins: 19, Training player wins: 14, Draws: 7
Win + draw rate for training player: 0.525


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 25.60it/s]
Win1/Win2/Draw: 4/12/4: 100%|██████████| 20/20 [00:00<00:00, 22.89it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

Training player vs random. Wins: 31, Losses: 4, Draws: 5


100%|██████████| 10/10 [00:00<00:00, 17.96it/s]
100%|██████████| 1000/1000 [00:03<00:00, 297.47it/s]
100%|██████████| 10/10 [00:00<00:00, 20.18it/s]
100%|██████████| 1000/1000 [00:02<00:00, 338.98it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:01, 16.61it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 14/3/3: 100%|██████████| 20/20 [00:01<00:00, 16.36it/s]
Win1/Win2/Draw: 10/10/0: 100%|██████████| 20/20 [00:01<00:00, 16.59it/s]
Win1/Win2/Draw: 4/0/1:  25%|██▌       | 5/20 [00:00<00:00, 24.33it/s]

Self-play player wins: 24, Training player wins: 13, Draws: 3
Win + draw rate for training player: 0.4


Win1/Win2/Draw: 18/1/1: 100%|██████████| 20/20 [00:00<00:00, 25.56it/s]
Win1/Win2/Draw: 7/9/4: 100%|██████████| 20/20 [00:00<00:00, 26.43it/s]
 20%|██        | 2/10 [00:00<00:00, 16.41it/s]

Training player vs random. Wins: 27, Losses: 8, Draws: 5


100%|██████████| 10/10 [00:00<00:00, 19.24it/s]
100%|██████████| 1000/1000 [00:03<00:00, 318.77it/s]
100%|██████████| 10/10 [00:00<00:00, 21.20it/s]
100%|██████████| 1000/1000 [00:03<00:00, 319.56it/s]
Win1/Win2/Draw: 1/2/0:  15%|█▌        | 3/20 [00:00<00:01, 16.15it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 14/4/2: 100%|██████████| 20/20 [00:01<00:00, 12.64it/s]
Win1/Win2/Draw: 10/8/2: 100%|██████████| 20/20 [00:01<00:00, 18.33it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 27.74it/s]

Self-play player wins: 22, Training player wins: 14, Draws: 4
Win + draw rate for training player: 0.45


Win1/Win2/Draw: 18/2/0: 100%|██████████| 20/20 [00:00<00:00, 27.68it/s]
Win1/Win2/Draw: 4/11/5: 100%|██████████| 20/20 [00:00<00:00, 28.56it/s]
 20%|██        | 2/10 [00:00<00:00, 19.97it/s]

Training player vs random. Wins: 29, Losses: 6, Draws: 5


100%|██████████| 10/10 [00:00<00:00, 19.99it/s]
100%|██████████| 1000/1000 [00:03<00:00, 314.77it/s]
100%|██████████| 10/10 [00:00<00:00, 19.64it/s]
100%|██████████| 1000/1000 [00:03<00:00, 309.28it/s]
Win1/Win2/Draw: 0/1/1:  10%|█         | 2/20 [00:00<00:01, 14.65it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/4/4: 100%|██████████| 20/20 [00:01<00:00, 14.71it/s]
Win1/Win2/Draw: 4/9/7: 100%|██████████| 20/20 [00:01<00:00, 13.83it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 17.27it/s]

Self-play player wins: 21, Training player wins: 8, Draws: 11
Win + draw rate for training player: 0.475


Win1/Win2/Draw: 16/1/3: 100%|██████████| 20/20 [00:00<00:00, 20.66it/s]
Win1/Win2/Draw: 7/10/3: 100%|██████████| 20/20 [00:00<00:00, 20.30it/s]
 30%|███       | 3/10 [00:00<00:00, 23.23it/s]

Training player vs random. Wins: 26, Losses: 8, Draws: 6


100%|██████████| 10/10 [00:00<00:00, 18.64it/s]
100%|██████████| 1000/1000 [00:02<00:00, 338.54it/s]
100%|██████████| 10/10 [00:00<00:00, 17.08it/s]
100%|██████████| 1000/1000 [00:04<00:00, 247.40it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:00, 19.53it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/7/1: 100%|██████████| 20/20 [00:01<00:00, 13.93it/s]
Win1/Win2/Draw: 9/7/4: 100%|██████████| 20/20 [00:01<00:00, 14.49it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 34.06it/s]

Self-play player wins: 19, Training player wins: 16, Draws: 5
Win + draw rate for training player: 0.525


Win1/Win2/Draw: 18/1/1: 100%|██████████| 20/20 [00:00<00:00, 27.97it/s]
Win1/Win2/Draw: 5/9/6: 100%|██████████| 20/20 [00:00<00:00, 34.02it/s]
 30%|███       | 3/10 [00:00<00:00, 20.58it/s]

Training player vs random. Wins: 27, Losses: 6, Draws: 7


100%|██████████| 10/10 [00:00<00:00, 20.62it/s]
100%|██████████| 1000/1000 [00:03<00:00, 320.88it/s]
100%|██████████| 10/10 [00:00<00:00, 14.47it/s]
100%|██████████| 1000/1000 [00:03<00:00, 270.68it/s]
Win1/Win2/Draw: 1/2/0:  15%|█▌        | 3/20 [00:00<00:01, 15.32it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 9/9/2: 100%|██████████| 20/20 [00:01<00:00, 14.38it/s]
Win1/Win2/Draw: 8/8/4: 100%|██████████| 20/20 [00:01<00:00, 17.53it/s]
Win1/Win2/Draw: 8/0/0:  40%|████      | 8/20 [00:00<00:00, 41.42it/s]

Self-play player wins: 17, Training player wins: 17, Draws: 6
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 18/1/1: 100%|██████████| 20/20 [00:00<00:00, 38.08it/s]
Win1/Win2/Draw: 11/6/3: 100%|██████████| 20/20 [00:00<00:00, 38.32it/s]


Training player vs random. Wins: 24, Losses: 12, Draws: 4
Updating self-play player.
Restoring from step: 52
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/52.checkpoint


100%|██████████| 10/10 [00:00<00:00, 20.48it/s]
100%|██████████| 1000/1000 [00:02<00:00, 345.19it/s]
100%|██████████| 10/10 [00:00<00:00, 15.76it/s]
100%|██████████| 1000/1000 [00:02<00:00, 335.45it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:01, 14.30it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 13/5/2: 100%|██████████| 20/20 [00:01<00:00, 19.67it/s]
Win1/Win2/Draw: 10/8/2: 100%|██████████| 20/20 [00:01<00:00, 19.98it/s]
Win1/Win2/Draw: 4/0/2:  30%|███       | 6/20 [00:00<00:00, 29.72it/s]

Self-play player wins: 21, Training player wins: 15, Draws: 4
Win + draw rate for training player: 0.475


Win1/Win2/Draw: 16/1/3: 100%|██████████| 20/20 [00:00<00:00, 35.66it/s]
Win1/Win2/Draw: 7/9/4: 100%|██████████| 20/20 [00:00<00:00, 37.37it/s]
 20%|██        | 2/10 [00:00<00:00, 18.89it/s]

Training player vs random. Wins: 25, Losses: 8, Draws: 7


100%|██████████| 10/10 [00:00<00:00, 18.25it/s]
100%|██████████| 1000/1000 [00:03<00:00, 319.54it/s]
100%|██████████| 10/10 [00:00<00:00, 19.83it/s]
100%|██████████| 1000/1000 [00:02<00:00, 351.49it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 26.05it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 19/1/0: 100%|██████████| 20/20 [00:00<00:00, 24.72it/s]
Win1/Win2/Draw: 10/9/1: 100%|██████████| 20/20 [00:00<00:00, 20.49it/s]
Win1/Win2/Draw: 6/0/1:  35%|███▌      | 7/20 [00:00<00:00, 39.41it/s]

Self-play player wins: 28, Training player wins: 11, Draws: 1
Win + draw rate for training player: 0.3


Win1/Win2/Draw: 16/1/3: 100%|██████████| 20/20 [00:00<00:00, 36.04it/s]
Win1/Win2/Draw: 5/14/1: 100%|██████████| 20/20 [00:00<00:00, 40.46it/s]
 30%|███       | 3/10 [00:00<00:00, 24.74it/s]

Training player vs random. Wins: 30, Losses: 6, Draws: 4


100%|██████████| 10/10 [00:00<00:00, 21.10it/s]
100%|██████████| 1000/1000 [00:03<00:00, 331.69it/s]
100%|██████████| 10/10 [00:00<00:00, 20.31it/s]
100%|██████████| 1000/1000 [00:03<00:00, 302.42it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:01, 16.99it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 9/5/6: 100%|██████████| 20/20 [00:01<00:00, 13.58it/s]
Win1/Win2/Draw: 13/4/3: 100%|██████████| 20/20 [00:01<00:00, 15.98it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 30.58it/s]

Self-play player wins: 13, Training player wins: 18, Draws: 9
Win + draw rate for training player: 0.675


Win1/Win2/Draw: 17/1/2: 100%|██████████| 20/20 [00:00<00:00, 31.53it/s]
Win1/Win2/Draw: 6/12/2: 100%|██████████| 20/20 [00:00<00:00, 20.20it/s]


Training player vs random. Wins: 29, Losses: 7, Draws: 4
Updating self-play player.
Restoring from step: 58
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/58.checkpoint


100%|██████████| 10/10 [00:00<00:00, 12.04it/s]
100%|██████████| 1000/1000 [00:05<00:00, 198.83it/s]
100%|██████████| 10/10 [00:00<00:00, 11.74it/s]
100%|██████████| 1000/1000 [00:02<00:00, 336.91it/s]
Win1/Win2/Draw: 1/1/1:  15%|█▌        | 3/20 [00:00<00:00, 18.82it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 9/3/8: 100%|██████████| 20/20 [00:01<00:00, 17.49it/s]
Win1/Win2/Draw: 12/4/4: 100%|██████████| 20/20 [00:01<00:00, 19.62it/s]
Win1/Win2/Draw: 4/1/1:  30%|███       | 6/20 [00:00<00:00, 33.09it/s]

Self-play player wins: 13, Training player wins: 15, Draws: 12
Win + draw rate for training player: 0.675


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:00<00:00, 35.70it/s]
Win1/Win2/Draw: 5/9/6: 100%|██████████| 20/20 [00:00<00:00, 33.91it/s]


Training player vs random. Wins: 26, Losses: 7, Draws: 7
Updating self-play player.
Restoring from step: 60
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/60.checkpoint


100%|██████████| 10/10 [00:01<00:00,  9.00it/s]
100%|██████████| 1000/1000 [00:04<00:00, 229.76it/s]
100%|██████████| 10/10 [00:00<00:00, 17.65it/s]
100%|██████████| 1000/1000 [00:03<00:00, 290.52it/s]
Win1/Win2/Draw: 2/0/0:  10%|█         | 2/20 [00:00<00:02,  7.32it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:01<00:00, 14.36it/s]
Win1/Win2/Draw: 11/6/3: 100%|██████████| 20/20 [00:01<00:00, 15.40it/s]
Win1/Win2/Draw: 3/1/0:  20%|██        | 4/20 [00:00<00:00, 19.26it/s]

Self-play player wins: 23, Training player wins: 13, Draws: 4
Win + draw rate for training player: 0.425


Win1/Win2/Draw: 15/5/0: 100%|██████████| 20/20 [00:00<00:00, 22.64it/s]
Win1/Win2/Draw: 7/11/2: 100%|██████████| 20/20 [00:00<00:00, 20.68it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

Training player vs random. Wins: 26, Losses: 12, Draws: 2


100%|██████████| 10/10 [00:00<00:00, 16.40it/s]
100%|██████████| 1000/1000 [00:03<00:00, 268.23it/s]
100%|██████████| 10/10 [00:00<00:00, 15.11it/s]
100%|██████████| 1000/1000 [00:03<00:00, 320.31it/s]
Win1/Win2/Draw: 2/0/0:  10%|█         | 2/20 [00:00<00:01, 15.02it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/7/1: 100%|██████████| 20/20 [00:01<00:00, 14.82it/s]
Win1/Win2/Draw: 13/5/2: 100%|██████████| 20/20 [00:01<00:00, 15.28it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 24.09it/s]

Self-play player wins: 17, Training player wins: 20, Draws: 3
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 23.47it/s]
Win1/Win2/Draw: 6/10/4: 100%|██████████| 20/20 [00:00<00:00, 24.42it/s]


Training player vs random. Wins: 29, Losses: 6, Draws: 5
Updating self-play player.
Restoring from step: 64
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/64.checkpoint


100%|██████████| 10/10 [00:00<00:00, 17.22it/s]
100%|██████████| 1000/1000 [00:03<00:00, 271.84it/s]
100%|██████████| 10/10 [00:00<00:00, 16.94it/s]
100%|██████████| 1000/1000 [00:03<00:00, 276.49it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:00, 19.63it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/7/1: 100%|██████████| 20/20 [00:01<00:00, 17.11it/s]
Win1/Win2/Draw: 11/4/5: 100%|██████████| 20/20 [00:01<00:00, 16.36it/s]
Win1/Win2/Draw: 4/1/0:  25%|██▌       | 5/20 [00:00<00:00, 28.85it/s]

Self-play player wins: 16, Training player wins: 18, Draws: 6
Win + draw rate for training player: 0.6


Win1/Win2/Draw: 16/3/1: 100%|██████████| 20/20 [00:00<00:00, 27.16it/s]
Win1/Win2/Draw: 7/13/0: 100%|██████████| 20/20 [00:00<00:00, 28.40it/s]


Training player vs random. Wins: 29, Losses: 10, Draws: 1
Updating self-play player.
Restoring from step: 66
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/66.checkpoint


100%|██████████| 10/10 [00:00<00:00, 21.69it/s]
100%|██████████| 1000/1000 [00:03<00:00, 263.97it/s]
100%|██████████| 10/10 [00:00<00:00, 16.40it/s]
100%|██████████| 1000/1000 [00:03<00:00, 307.55it/s]
Win1/Win2/Draw: 0/0/2:  10%|█         | 2/20 [00:00<00:01, 13.47it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 6/6/8: 100%|██████████| 20/20 [00:01<00:00, 16.11it/s]
Win1/Win2/Draw: 9/8/3: 100%|██████████| 20/20 [00:01<00:00, 16.36it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 28.84it/s]

Self-play player wins: 14, Training player wins: 15, Draws: 11
Win + draw rate for training player: 0.65


Win1/Win2/Draw: 17/1/2: 100%|██████████| 20/20 [00:00<00:00, 29.29it/s]
Win1/Win2/Draw: 6/13/1: 100%|██████████| 20/20 [00:00<00:00, 30.18it/s]


Training player vs random. Wins: 30, Losses: 7, Draws: 3
Updating self-play player.
Restoring from step: 68
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/68.checkpoint


100%|██████████| 10/10 [00:00<00:00, 20.14it/s]
100%|██████████| 1000/1000 [00:02<00:00, 349.07it/s]
100%|██████████| 10/10 [00:00<00:00, 21.52it/s]
100%|██████████| 1000/1000 [00:02<00:00, 353.89it/s]
Win1/Win2/Draw: 0/2/1:  15%|█▌        | 3/20 [00:00<00:00, 18.31it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 11/6/3: 100%|██████████| 20/20 [00:01<00:00, 17.09it/s]
Win1/Win2/Draw: 10/7/3: 100%|██████████| 20/20 [00:01<00:00, 18.07it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 29.09it/s]

Self-play player wins: 18, Training player wins: 16, Draws: 6
Win + draw rate for training player: 0.55


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 28.40it/s]
Win1/Win2/Draw: 6/14/0: 100%|██████████| 20/20 [00:00<00:00, 32.38it/s]
 30%|███       | 3/10 [00:00<00:00, 24.29it/s]

Training player vs random. Wins: 33, Losses: 6, Draws: 1


100%|██████████| 10/10 [00:00<00:00, 21.57it/s]
100%|██████████| 1000/1000 [00:02<00:00, 350.31it/s]
100%|██████████| 10/10 [00:00<00:00, 23.48it/s]
100%|██████████| 1000/1000 [00:02<00:00, 350.18it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:00, 17.22it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 9/9/2: 100%|██████████| 20/20 [00:01<00:00, 17.44it/s]
Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:01<00:00, 18.60it/s]
Win1/Win2/Draw: 5/0/1:  30%|███       | 6/20 [00:00<00:00, 31.51it/s]

Self-play player wins: 11, Training player wins: 26, Draws: 3
Win + draw rate for training player: 0.725


Win1/Win2/Draw: 17/1/2: 100%|██████████| 20/20 [00:00<00:00, 29.16it/s]
Win1/Win2/Draw: 9/9/2: 100%|██████████| 20/20 [00:00<00:00, 31.45it/s]


Training player vs random. Wins: 26, Losses: 10, Draws: 4
Updating self-play player.
Restoring from step: 72
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/72.checkpoint


100%|██████████| 10/10 [00:00<00:00, 21.72it/s]
100%|██████████| 1000/1000 [00:02<00:00, 335.98it/s]
100%|██████████| 10/10 [00:00<00:00, 23.45it/s]
100%|██████████| 1000/1000 [00:03<00:00, 320.73it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:00, 20.64it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 18/2/0: 100%|██████████| 20/20 [00:01<00:00, 19.38it/s]
Win1/Win2/Draw: 5/10/5: 100%|██████████| 20/20 [00:01<00:00, 16.71it/s]
Win1/Win2/Draw: 2/2/0:  20%|██        | 4/20 [00:00<00:00, 21.50it/s]

Self-play player wins: 28, Training player wins: 7, Draws: 5
Win + draw rate for training player: 0.3


Win1/Win2/Draw: 17/3/0: 100%|██████████| 20/20 [00:00<00:00, 26.40it/s]
Win1/Win2/Draw: 6/9/5: 100%|██████████| 20/20 [00:00<00:00, 31.27it/s]
 30%|███       | 3/10 [00:00<00:00, 22.81it/s]

Training player vs random. Wins: 26, Losses: 9, Draws: 5


100%|██████████| 10/10 [00:00<00:00, 21.85it/s]
100%|██████████| 1000/1000 [00:02<00:00, 354.41it/s]
100%|██████████| 10/10 [00:00<00:00, 24.14it/s]
100%|██████████| 1000/1000 [00:02<00:00, 353.80it/s]
Win1/Win2/Draw: 1/2/0:  15%|█▌        | 3/20 [00:00<00:00, 19.20it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 13/7/0: 100%|██████████| 20/20 [00:01<00:00, 18.39it/s]
Win1/Win2/Draw: 14/5/1: 100%|██████████| 20/20 [00:01<00:00, 19.24it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 32.11it/s]

Self-play player wins: 18, Training player wins: 21, Draws: 1
Win + draw rate for training player: 0.55


Win1/Win2/Draw: 13/6/1: 100%|██████████| 20/20 [00:00<00:00, 29.68it/s]
Win1/Win2/Draw: 1/15/4: 100%|██████████| 20/20 [00:00<00:00, 31.29it/s]
 30%|███       | 3/10 [00:00<00:00, 23.91it/s]

Training player vs random. Wins: 28, Losses: 7, Draws: 5


100%|██████████| 10/10 [00:00<00:00, 22.40it/s]
100%|██████████| 1000/1000 [00:02<00:00, 339.84it/s]
100%|██████████| 10/10 [00:00<00:00, 22.15it/s]
100%|██████████| 1000/1000 [00:03<00:00, 324.68it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:01, 16.34it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 17/3/0: 100%|██████████| 20/20 [00:01<00:00, 17.43it/s]
Win1/Win2/Draw: 6/11/3: 100%|██████████| 20/20 [00:01<00:00, 16.81it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 27.53it/s]

Self-play player wins: 28, Training player wins: 9, Draws: 3
Win + draw rate for training player: 0.3


Win1/Win2/Draw: 19/1/0: 100%|██████████| 20/20 [00:00<00:00, 28.38it/s]
Win1/Win2/Draw: 8/8/4: 100%|██████████| 20/20 [00:00<00:00, 27.96it/s]
 30%|███       | 3/10 [00:00<00:00, 20.91it/s]

Training player vs random. Wins: 27, Losses: 9, Draws: 4


100%|██████████| 10/10 [00:00<00:00, 20.79it/s]
100%|██████████| 1000/1000 [00:03<00:00, 309.72it/s]
100%|██████████| 10/10 [00:00<00:00, 18.19it/s]
100%|██████████| 1000/1000 [00:03<00:00, 305.35it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:00, 18.40it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/7/1: 100%|██████████| 20/20 [00:01<00:00, 18.51it/s]
Win1/Win2/Draw: 7/12/1: 100%|██████████| 20/20 [00:01<00:00, 17.49it/s]
Win1/Win2/Draw: 4/0/1:  25%|██▌       | 5/20 [00:00<00:00, 25.86it/s]

Self-play player wins: 24, Training player wins: 14, Draws: 2
Win + draw rate for training player: 0.4


Win1/Win2/Draw: 18/1/1: 100%|██████████| 20/20 [00:00<00:00, 28.39it/s]
Win1/Win2/Draw: 5/10/5: 100%|██████████| 20/20 [00:00<00:00, 27.91it/s]
 30%|███       | 3/10 [00:00<00:00, 22.12it/s]

Training player vs random. Wins: 28, Losses: 6, Draws: 6


100%|██████████| 10/10 [00:00<00:00, 22.28it/s]
100%|██████████| 1000/1000 [00:02<00:00, 337.15it/s]
100%|██████████| 10/10 [00:00<00:00, 22.85it/s]
100%|██████████| 1000/1000 [00:02<00:00, 338.46it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:00, 19.41it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:01<00:00, 19.50it/s]
Win1/Win2/Draw: 11/4/5: 100%|██████████| 20/20 [00:01<00:00, 16.74it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 25.37it/s]

Self-play player wins: 21, Training player wins: 13, Draws: 6
Win + draw rate for training player: 0.475


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:00<00:00, 27.98it/s]
Win1/Win2/Draw: 4/11/5: 100%|██████████| 20/20 [00:00<00:00, 29.65it/s]
 30%|███       | 3/10 [00:00<00:00, 24.39it/s]

Training player vs random. Wins: 28, Losses: 6, Draws: 6


100%|██████████| 10/10 [00:00<00:00, 24.08it/s]
100%|██████████| 1000/1000 [00:02<00:00, 346.71it/s]
100%|██████████| 10/10 [00:00<00:00, 22.97it/s]
100%|██████████| 1000/1000 [00:02<00:00, 338.10it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:01, 15.74it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 14/4/2: 100%|██████████| 20/20 [00:01<00:00, 16.79it/s]
Win1/Win2/Draw: 11/3/6: 100%|██████████| 20/20 [00:01<00:00, 16.76it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 32.40it/s]

Self-play player wins: 17, Training player wins: 15, Draws: 8
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 26.90it/s]
Win1/Win2/Draw: 6/11/3: 100%|██████████| 20/20 [00:00<00:00, 30.17it/s]


Training player vs random. Wins: 30, Losses: 6, Draws: 4
Updating self-play player.
Restoring from step: 84
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/84.checkpoint


100%|██████████| 10/10 [00:00<00:00, 18.56it/s]
100%|██████████| 1000/1000 [00:03<00:00, 326.52it/s]
100%|██████████| 10/10 [00:00<00:00, 20.50it/s]
100%|██████████| 1000/1000 [00:02<00:00, 338.53it/s]
Win1/Win2/Draw: 1/2/0:  15%|█▌        | 3/20 [00:00<00:01, 15.86it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/7/1: 100%|██████████| 20/20 [00:01<00:00, 18.83it/s]
Win1/Win2/Draw: 20/0/0: 100%|██████████| 20/20 [00:01<00:00, 17.89it/s]
Win1/Win2/Draw: 4/0/1:  25%|██▌       | 5/20 [00:00<00:00, 27.09it/s]

Self-play player wins: 12, Training player wins: 27, Draws: 1
Win + draw rate for training player: 0.7


Win1/Win2/Draw: 18/0/2: 100%|██████████| 20/20 [00:00<00:00, 27.40it/s]
Win1/Win2/Draw: 5/13/2: 100%|██████████| 20/20 [00:00<00:00, 30.72it/s]


Training player vs random. Wins: 31, Losses: 5, Draws: 4
Updating self-play player.
Restoring from step: 86
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/86.checkpoint


100%|██████████| 10/10 [00:00<00:00, 19.90it/s]
100%|██████████| 1000/1000 [00:03<00:00, 328.13it/s]
100%|██████████| 10/10 [00:00<00:00, 20.79it/s]
100%|██████████| 1000/1000 [00:03<00:00, 326.99it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:00, 17.65it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:01<00:00, 17.84it/s]
Win1/Win2/Draw: 9/8/3: 100%|██████████| 20/20 [00:01<00:00, 16.62it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 31.96it/s]

Self-play player wins: 25, Training player wins: 11, Draws: 4
Win + draw rate for training player: 0.375


Win1/Win2/Draw: 16/2/2: 100%|██████████| 20/20 [00:00<00:00, 29.25it/s]
Win1/Win2/Draw: 9/10/1: 100%|██████████| 20/20 [00:00<00:00, 29.77it/s]
 30%|███       | 3/10 [00:00<00:00, 21.16it/s]

Training player vs random. Wins: 26, Losses: 11, Draws: 3


100%|██████████| 10/10 [00:00<00:00, 19.96it/s]
100%|██████████| 1000/1000 [00:03<00:00, 330.70it/s]
100%|██████████| 10/10 [00:00<00:00, 20.31it/s]
100%|██████████| 1000/1000 [00:03<00:00, 328.97it/s]
Win1/Win2/Draw: 2/0/0:  10%|█         | 2/20 [00:00<00:01, 15.34it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 9/11/0: 100%|██████████| 20/20 [00:01<00:00, 16.56it/s]
Win1/Win2/Draw: 10/9/1: 100%|██████████| 20/20 [00:01<00:00, 17.77it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 28.38it/s]

Self-play player wins: 18, Training player wins: 21, Draws: 1
Win + draw rate for training player: 0.55


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 27.60it/s]
Win1/Win2/Draw: 6/9/5: 100%|██████████| 20/20 [00:00<00:00, 30.61it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

Training player vs random. Wins: 28, Losses: 6, Draws: 6


100%|██████████| 10/10 [00:00<00:00, 20.02it/s]
100%|██████████| 1000/1000 [00:02<00:00, 333.79it/s]
100%|██████████| 10/10 [00:00<00:00, 19.83it/s]
100%|██████████| 1000/1000 [00:03<00:00, 329.22it/s]
Win1/Win2/Draw: 2/0/1:  15%|█▌        | 3/20 [00:00<00:01, 16.63it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/5/3: 100%|██████████| 20/20 [00:01<00:00, 16.49it/s]
Win1/Win2/Draw: 13/5/2: 100%|██████████| 20/20 [00:01<00:00, 18.07it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 27.76it/s]

Self-play player wins: 17, Training player wins: 18, Draws: 5
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 18/2/0: 100%|██████████| 20/20 [00:00<00:00, 27.06it/s]
Win1/Win2/Draw: 7/11/2: 100%|██████████| 20/20 [00:00<00:00, 31.59it/s]


Training player vs random. Wins: 29, Losses: 9, Draws: 2
Updating self-play player.
Restoring from step: 92
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/92.checkpoint


100%|██████████| 10/10 [00:00<00:00, 20.45it/s]
100%|██████████| 1000/1000 [00:03<00:00, 333.15it/s]
100%|██████████| 10/10 [00:00<00:00, 21.06it/s]
100%|██████████| 1000/1000 [00:02<00:00, 340.86it/s]
Win1/Win2/Draw: 0/2/1:  15%|█▌        | 3/20 [00:00<00:01, 15.35it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 11/7/2: 100%|██████████| 20/20 [00:01<00:00, 16.53it/s]
Win1/Win2/Draw: 12/7/1: 100%|██████████| 20/20 [00:01<00:00, 17.94it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 31.33it/s]

Self-play player wins: 18, Training player wins: 19, Draws: 3
Win + draw rate for training player: 0.55


Win1/Win2/Draw: 20/0/0: 100%|██████████| 20/20 [00:00<00:00, 30.91it/s]
Win1/Win2/Draw: 9/10/1: 100%|██████████| 20/20 [00:00<00:00, 30.17it/s]
 30%|███       | 3/10 [00:00<00:00, 22.86it/s]

Training player vs random. Wins: 30, Losses: 9, Draws: 1


100%|██████████| 10/10 [00:00<00:00, 22.52it/s]
100%|██████████| 1000/1000 [00:02<00:00, 334.04it/s]
100%|██████████| 10/10 [00:00<00:00, 20.75it/s]
100%|██████████| 1000/1000 [00:03<00:00, 324.09it/s]
Win1/Win2/Draw: 1/2/0:  15%|█▌        | 3/20 [00:00<00:01, 14.92it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 10/10/0: 100%|██████████| 20/20 [00:01<00:00, 16.75it/s]
Win1/Win2/Draw: 11/8/1: 100%|██████████| 20/20 [00:01<00:00, 17.00it/s]
Win1/Win2/Draw: 5/1/0:  30%|███       | 6/20 [00:00<00:00, 30.04it/s]

Self-play player wins: 18, Training player wins: 21, Draws: 1
Win + draw rate for training player: 0.55


Win1/Win2/Draw: 15/4/1: 100%|██████████| 20/20 [00:00<00:00, 28.83it/s]
Win1/Win2/Draw: 5/14/1: 100%|██████████| 20/20 [00:00<00:00, 30.20it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

Training player vs random. Wins: 29, Losses: 9, Draws: 2


100%|██████████| 10/10 [00:00<00:00, 20.66it/s]
100%|██████████| 1000/1000 [00:03<00:00, 333.07it/s]
100%|██████████| 10/10 [00:00<00:00, 20.68it/s]
100%|██████████| 1000/1000 [00:02<00:00, 333.97it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:00, 18.14it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 16/4/0: 100%|██████████| 20/20 [00:01<00:00, 17.05it/s]
Win1/Win2/Draw: 12/5/3: 100%|██████████| 20/20 [00:01<00:00, 16.93it/s]
Win1/Win2/Draw: 4/0/1:  25%|██▌       | 5/20 [00:00<00:00, 27.82it/s]

Self-play player wins: 21, Training player wins: 16, Draws: 3
Win + draw rate for training player: 0.475


Win1/Win2/Draw: 15/3/2: 100%|██████████| 20/20 [00:00<00:00, 27.70it/s]
Win1/Win2/Draw: 7/8/5: 100%|██████████| 20/20 [00:00<00:00, 27.39it/s]
 30%|███       | 3/10 [00:00<00:00, 22.20it/s]

Training player vs random. Wins: 23, Losses: 10, Draws: 7


100%|██████████| 10/10 [00:00<00:00, 21.63it/s]
100%|██████████| 1000/1000 [00:02<00:00, 336.26it/s]
100%|██████████| 10/10 [00:00<00:00, 21.62it/s]
100%|██████████| 1000/1000 [00:02<00:00, 342.71it/s]
Win1/Win2/Draw: 0/2/1:  15%|█▌        | 3/20 [00:00<00:01, 14.96it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 16/3/1: 100%|██████████| 20/20 [00:01<00:00, 18.63it/s]
Win1/Win2/Draw: 11/6/3: 100%|██████████| 20/20 [00:01<00:00, 17.46it/s]
Win1/Win2/Draw: 3/2/0:  25%|██▌       | 5/20 [00:00<00:00, 24.68it/s]

Self-play player wins: 22, Training player wins: 14, Draws: 4
Win + draw rate for training player: 0.45


Win1/Win2/Draw: 15/4/1: 100%|██████████| 20/20 [00:00<00:00, 27.97it/s]
Win1/Win2/Draw: 5/8/7: 100%|██████████| 20/20 [00:00<00:00, 28.41it/s]
 20%|██        | 2/10 [00:00<00:00, 18.05it/s]

Training player vs random. Wins: 23, Losses: 9, Draws: 8


100%|██████████| 10/10 [00:00<00:00, 20.33it/s]
100%|██████████| 1000/1000 [00:02<00:00, 341.10it/s]
100%|██████████| 10/10 [00:00<00:00, 19.76it/s]
100%|██████████| 1000/1000 [00:02<00:00, 345.66it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:00, 17.78it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 13/4/3: 100%|██████████| 20/20 [00:01<00:00, 16.09it/s]
Win1/Win2/Draw: 12/7/1: 100%|██████████| 20/20 [00:01<00:00, 17.18it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 26.08it/s]

Self-play player wins: 20, Training player wins: 16, Draws: 4
Win + draw rate for training player: 0.5


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:00<00:00, 28.64it/s]
Win1/Win2/Draw: 8/9/3: 100%|██████████| 20/20 [00:00<00:00, 30.69it/s]
 30%|███       | 3/10 [00:00<00:00, 23.54it/s]

Training player vs random. Wins: 26, Losses: 10, Draws: 4


100%|██████████| 10/10 [00:00<00:00, 20.33it/s]
100%|██████████| 1000/1000 [00:02<00:00, 344.48it/s]
100%|██████████| 10/10 [00:00<00:00, 19.94it/s]
100%|██████████| 1000/1000 [00:02<00:00, 342.37it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:01, 16.29it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 16/2/2: 100%|██████████| 20/20 [00:01<00:00, 17.32it/s]
Win1/Win2/Draw: 17/1/2: 100%|██████████| 20/20 [00:01<00:00, 17.85it/s]
Win1/Win2/Draw: 4/0/1:  25%|██▌       | 5/20 [00:00<00:00, 30.13it/s]

Self-play player wins: 17, Training player wins: 19, Draws: 4
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 17/1/2: 100%|██████████| 20/20 [00:00<00:00, 29.50it/s]
Win1/Win2/Draw: 8/10/2: 100%|██████████| 20/20 [00:00<00:00, 28.35it/s]


Training player vs random. Wins: 27, Losses: 9, Draws: 4
Updating self-play player.
Restoring from step: 104
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/104.checkpoint


100%|██████████| 10/10 [00:00<00:00, 19.61it/s]
100%|██████████| 1000/1000 [00:03<00:00, 331.27it/s]
100%|██████████| 10/10 [00:00<00:00, 20.07it/s]
100%|██████████| 1000/1000 [00:03<00:00, 333.12it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:01, 16.47it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 8/10/2: 100%|██████████| 20/20 [00:01<00:00, 16.58it/s]
Win1/Win2/Draw: 20/0/0: 100%|██████████| 20/20 [00:01<00:00, 19.48it/s]
Win1/Win2/Draw: 5/0/1:  30%|███       | 6/20 [00:00<00:00, 32.17it/s]

Self-play player wins: 8, Training player wins: 30, Draws: 2
Win + draw rate for training player: 0.8


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 31.37it/s]
Win1/Win2/Draw: 6/9/5: 100%|██████████| 20/20 [00:00<00:00, 30.75it/s]


Training player vs random. Wins: 28, Losses: 6, Draws: 6
Updating self-play player.
Restoring from step: 106
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/106.checkpoint


100%|██████████| 10/10 [00:00<00:00, 18.97it/s]
100%|██████████| 1000/1000 [00:02<00:00, 343.00it/s]
100%|██████████| 10/10 [00:00<00:00, 20.84it/s]
100%|██████████| 1000/1000 [00:02<00:00, 349.36it/s]
Win1/Win2/Draw: 2/0/1:  15%|█▌        | 3/20 [00:00<00:00, 18.73it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 15/0/5: 100%|██████████| 20/20 [00:01<00:00, 18.31it/s]
Win1/Win2/Draw: 17/3/0: 100%|██████████| 20/20 [00:01<00:00, 17.03it/s]
Win1/Win2/Draw: 4/0/1:  25%|██▌       | 5/20 [00:00<00:00, 28.94it/s]

Self-play player wins: 18, Training player wins: 17, Draws: 5
Win + draw rate for training player: 0.55


Win1/Win2/Draw: 18/1/1: 100%|██████████| 20/20 [00:00<00:00, 29.60it/s]
Win1/Win2/Draw: 6/12/2: 100%|██████████| 20/20 [00:00<00:00, 29.33it/s]
 20%|██        | 2/10 [00:00<00:00, 17.91it/s]

Training player vs random. Wins: 30, Losses: 7, Draws: 3


100%|██████████| 10/10 [00:00<00:00, 19.59it/s]
100%|██████████| 1000/1000 [00:02<00:00, 349.93it/s]
100%|██████████| 10/10 [00:00<00:00, 19.04it/s]
100%|██████████| 1000/1000 [00:02<00:00, 342.84it/s]
Win1/Win2/Draw: 1/2/0:  15%|█▌        | 3/20 [00:00<00:00, 17.80it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 7/10/3: 100%|██████████| 20/20 [00:01<00:00, 16.49it/s]
Win1/Win2/Draw: 9/6/5: 100%|██████████| 20/20 [00:01<00:00, 16.44it/s]
Win1/Win2/Draw: 3/1/1:  25%|██▌       | 5/20 [00:00<00:00, 23.29it/s]

Self-play player wins: 13, Training player wins: 19, Draws: 8
Win + draw rate for training player: 0.675


Win1/Win2/Draw: 17/1/2: 100%|██████████| 20/20 [00:00<00:00, 29.30it/s]
Win1/Win2/Draw: 3/13/4: 100%|██████████| 20/20 [00:00<00:00, 30.48it/s]


Training player vs random. Wins: 30, Losses: 4, Draws: 6
Updating self-play player.
Restoring from step: 110
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/110.checkpoint


100%|██████████| 10/10 [00:00<00:00, 19.27it/s]
100%|██████████| 1000/1000 [00:02<00:00, 341.98it/s]
100%|██████████| 10/10 [00:00<00:00, 21.31it/s]
100%|██████████| 1000/1000 [00:02<00:00, 334.97it/s]
Win1/Win2/Draw: 0/0/2:  10%|█         | 2/20 [00:00<00:01, 12.73it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 10/1/9: 100%|██████████| 20/20 [00:01<00:00, 14.88it/s]
Win1/Win2/Draw: 13/7/0: 100%|██████████| 20/20 [00:01<00:00, 17.66it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 30.59it/s]

Self-play player wins: 17, Training player wins: 14, Draws: 9
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 17/3/0: 100%|██████████| 20/20 [00:00<00:00, 29.44it/s]
Win1/Win2/Draw: 6/11/3: 100%|██████████| 20/20 [00:00<00:00, 29.04it/s]


Training player vs random. Wins: 28, Losses: 9, Draws: 3
Updating self-play player.
Restoring from step: 112
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/112.checkpoint


100%|██████████| 10/10 [00:00<00:00, 20.87it/s]
100%|██████████| 1000/1000 [00:02<00:00, 343.34it/s]
100%|██████████| 10/10 [00:00<00:00, 21.64it/s]
100%|██████████| 1000/1000 [00:02<00:00, 340.62it/s]
Win1/Win2/Draw: 0/3/0:  15%|█▌        | 3/20 [00:00<00:00, 18.84it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 11/9/0: 100%|██████████| 20/20 [00:01<00:00, 18.28it/s]
Win1/Win2/Draw: 6/6/8: 100%|██████████| 20/20 [00:01<00:00, 15.53it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 27.36it/s]

Self-play player wins: 17, Training player wins: 15, Draws: 8
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:00<00:00, 28.51it/s]
Win1/Win2/Draw: 8/11/1: 100%|██████████| 20/20 [00:00<00:00, 30.55it/s]


Training player vs random. Wins: 28, Losses: 10, Draws: 2
Updating self-play player.
Restoring from step: 114
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/114.checkpoint


100%|██████████| 10/10 [00:00<00:00, 21.65it/s]
100%|██████████| 1000/1000 [00:02<00:00, 349.59it/s]
100%|██████████| 10/10 [00:00<00:00, 21.18it/s]
100%|██████████| 1000/1000 [00:02<00:00, 340.93it/s]
Win1/Win2/Draw: 1/2/0:  15%|█▌        | 3/20 [00:00<00:00, 18.82it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 9/10/1: 100%|██████████| 20/20 [00:01<00:00, 17.43it/s]
Win1/Win2/Draw: 8/8/4: 100%|██████████| 20/20 [00:01<00:00, 19.15it/s]
Win1/Win2/Draw: 3/0/1:  20%|██        | 4/20 [00:00<00:00, 23.39it/s]

Self-play player wins: 17, Training player wins: 18, Draws: 5
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 17/1/2: 100%|██████████| 20/20 [00:00<00:00, 27.18it/s]
Win1/Win2/Draw: 5/9/6: 100%|██████████| 20/20 [00:00<00:00, 30.17it/s]


Training player vs random. Wins: 26, Losses: 6, Draws: 8
Updating self-play player.
Restoring from step: 116
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/116.checkpoint


100%|██████████| 10/10 [00:00<00:00, 20.40it/s]
100%|██████████| 1000/1000 [00:03<00:00, 326.82it/s]
100%|██████████| 10/10 [00:00<00:00, 20.12it/s]
100%|██████████| 1000/1000 [00:02<00:00, 344.53it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:00, 19.79it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 10/5/5: 100%|██████████| 20/20 [00:01<00:00, 16.74it/s]
Win1/Win2/Draw: 6/11/3: 100%|██████████| 20/20 [00:01<00:00, 17.12it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 28.40it/s]

Self-play player wins: 21, Training player wins: 11, Draws: 8
Win + draw rate for training player: 0.475


Win1/Win2/Draw: 20/0/0: 100%|██████████| 20/20 [00:00<00:00, 30.36it/s]
Win1/Win2/Draw: 3/13/4: 100%|██████████| 20/20 [00:00<00:00, 31.46it/s]
 20%|██        | 2/10 [00:00<00:00, 19.54it/s]

Training player vs random. Wins: 33, Losses: 3, Draws: 4


100%|██████████| 10/10 [00:00<00:00, 19.29it/s]
100%|██████████| 1000/1000 [00:03<00:00, 329.44it/s]
100%|██████████| 10/10 [00:00<00:00, 19.64it/s]
100%|██████████| 1000/1000 [00:03<00:00, 332.78it/s]
Win1/Win2/Draw: 1/1/0:  10%|█         | 2/20 [00:00<00:01, 16.16it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 11/6/3: 100%|██████████| 20/20 [00:01<00:00, 15.12it/s]
Win1/Win2/Draw: 14/5/1: 100%|██████████| 20/20 [00:01<00:00, 19.30it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 29.90it/s]

Self-play player wins: 16, Training player wins: 20, Draws: 4
Win + draw rate for training player: 0.6


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:00<00:00, 28.01it/s]
Win1/Win2/Draw: 5/8/7: 100%|██████████| 20/20 [00:00<00:00, 29.38it/s]


Training player vs random. Wins: 25, Losses: 7, Draws: 8
Updating self-play player.
Restoring from step: 120
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/120.checkpoint


100%|██████████| 10/10 [00:00<00:00, 17.79it/s]
100%|██████████| 1000/1000 [00:03<00:00, 326.47it/s]
100%|██████████| 10/10 [00:00<00:00, 19.13it/s]
100%|██████████| 1000/1000 [00:03<00:00, 329.97it/s]
Win1/Win2/Draw: 1/2/0:  15%|█▌        | 3/20 [00:00<00:01, 15.46it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/6/2: 100%|██████████| 20/20 [00:01<00:00, 18.40it/s]
Win1/Win2/Draw: 11/9/0: 100%|██████████| 20/20 [00:01<00:00, 17.71it/s]
Win1/Win2/Draw: 4/1/0:  25%|██▌       | 5/20 [00:00<00:00, 26.08it/s]

Self-play player wins: 21, Training player wins: 17, Draws: 2
Win + draw rate for training player: 0.475


Win1/Win2/Draw: 18/2/0: 100%|██████████| 20/20 [00:00<00:00, 32.16it/s]
Win1/Win2/Draw: 3/17/0: 100%|██████████| 20/20 [00:00<00:00, 32.14it/s]
 20%|██        | 2/10 [00:00<00:00, 18.52it/s]

Training player vs random. Wins: 35, Losses: 5, Draws: 0


100%|██████████| 10/10 [00:00<00:00, 18.21it/s]
100%|██████████| 1000/1000 [00:03<00:00, 330.11it/s]
100%|██████████| 10/10 [00:00<00:00, 17.90it/s]
100%|██████████| 1000/1000 [00:03<00:00, 327.49it/s]
Win1/Win2/Draw: 1/1/1:  15%|█▌        | 3/20 [00:00<00:00, 17.13it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 10/8/2: 100%|██████████| 20/20 [00:01<00:00, 17.48it/s]
Win1/Win2/Draw: 10/7/3: 100%|██████████| 20/20 [00:01<00:00, 16.60it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 33.59it/s]

Self-play player wins: 17, Training player wins: 18, Draws: 5
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 31.97it/s]
Win1/Win2/Draw: 4/14/2: 100%|██████████| 20/20 [00:00<00:00, 30.96it/s]


Training player vs random. Wins: 33, Losses: 4, Draws: 3
Updating self-play player.
Restoring from step: 124
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/124.checkpoint


100%|██████████| 10/10 [00:00<00:00, 20.04it/s]
100%|██████████| 1000/1000 [00:03<00:00, 319.68it/s]
100%|██████████| 10/10 [00:00<00:00, 18.05it/s]
100%|██████████| 1000/1000 [00:03<00:00, 319.36it/s]
Win1/Win2/Draw: 1/2/0:  15%|█▌        | 3/20 [00:00<00:00, 17.02it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 8/12/0: 100%|██████████| 20/20 [00:01<00:00, 16.70it/s]
Win1/Win2/Draw: 13/7/0: 100%|██████████| 20/20 [00:01<00:00, 17.93it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 34.03it/s]

Self-play player wins: 15, Training player wins: 25, Draws: 0
Win + draw rate for training player: 0.625


Win1/Win2/Draw: 20/0/0: 100%|██████████| 20/20 [00:00<00:00, 33.02it/s]
Win1/Win2/Draw: 8/7/5: 100%|██████████| 20/20 [00:00<00:00, 28.47it/s]


Training player vs random. Wins: 27, Losses: 8, Draws: 5
Updating self-play player.
Restoring from step: 126
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/126.checkpoint


100%|██████████| 10/10 [00:00<00:00, 18.44it/s]
100%|██████████| 1000/1000 [00:03<00:00, 326.92it/s]
100%|██████████| 10/10 [00:00<00:00, 19.80it/s]
100%|██████████| 1000/1000 [00:03<00:00, 327.22it/s]
Win1/Win2/Draw: 1/0/1:  10%|█         | 2/20 [00:00<00:01, 13.78it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 9/0/11: 100%|██████████| 20/20 [00:01<00:00, 13.60it/s]
Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:01<00:00, 15.88it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 36.13it/s]

Self-play player wins: 11, Training player wins: 17, Draws: 12
Win + draw rate for training player: 0.725


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 26.90it/s]
Win1/Win2/Draw: 3/9/8: 100%|██████████| 20/20 [00:00<00:00, 28.63it/s]


Training player vs random. Wins: 28, Losses: 3, Draws: 9
Updating self-play player.
Restoring from step: 128
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/128.checkpoint


100%|██████████| 10/10 [00:00<00:00, 13.46it/s]
100%|██████████| 1000/1000 [00:03<00:00, 291.45it/s]
100%|██████████| 10/10 [00:00<00:00, 20.07it/s]
100%|██████████| 1000/1000 [00:02<00:00, 353.11it/s]
Win1/Win2/Draw: 0/3/0:  15%|█▌        | 3/20 [00:00<00:00, 18.74it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/8/0: 100%|██████████| 20/20 [00:01<00:00, 19.63it/s]
Win1/Win2/Draw: 5/7/8: 100%|██████████| 20/20 [00:01<00:00, 15.10it/s]
Win1/Win2/Draw: 5/0/1:  30%|███       | 6/20 [00:00<00:00, 31.14it/s]

Self-play player wins: 19, Training player wins: 13, Draws: 8
Win + draw rate for training player: 0.525


Win1/Win2/Draw: 15/3/2: 100%|██████████| 20/20 [00:00<00:00, 29.55it/s]
Win1/Win2/Draw: 2/14/4: 100%|██████████| 20/20 [00:00<00:00, 35.33it/s]
 30%|███       | 3/10 [00:00<00:00, 24.06it/s]

Training player vs random. Wins: 29, Losses: 5, Draws: 6


100%|██████████| 10/10 [00:00<00:00, 24.21it/s]
100%|██████████| 1000/1000 [00:02<00:00, 349.74it/s]
100%|██████████| 10/10 [00:00<00:00, 17.00it/s]
100%|██████████| 1000/1000 [00:03<00:00, 282.87it/s]
Win1/Win2/Draw: 3/1/0:  20%|██        | 4/20 [00:00<00:00, 21.60it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/8/0: 100%|██████████| 20/20 [00:01<00:00, 18.15it/s]
Win1/Win2/Draw: 5/11/4: 100%|██████████| 20/20 [00:01<00:00, 17.19it/s]
Win1/Win2/Draw: 6/0/1:  35%|███▌      | 7/20 [00:00<00:00, 37.32it/s]

Self-play player wins: 23, Training player wins: 13, Draws: 4
Win + draw rate for training player: 0.425


Win1/Win2/Draw: 18/1/1: 100%|██████████| 20/20 [00:00<00:00, 35.90it/s]
Win1/Win2/Draw: 5/13/2: 100%|██████████| 20/20 [00:00<00:00, 40.05it/s]
 30%|███       | 3/10 [00:00<00:00, 20.43it/s]

Training player vs random. Wins: 31, Losses: 6, Draws: 3


100%|██████████| 10/10 [00:00<00:00, 21.59it/s]
100%|██████████| 1000/1000 [00:03<00:00, 283.54it/s]
100%|██████████| 10/10 [00:00<00:00, 21.46it/s]
100%|██████████| 1000/1000 [00:04<00:00, 243.31it/s]
Win1/Win2/Draw: 2/2/0:  20%|██        | 4/20 [00:00<00:00, 21.61it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 8/12/0: 100%|██████████| 20/20 [00:01<00:00, 18.74it/s]
Win1/Win2/Draw: 1/6/13: 100%|██████████| 20/20 [00:01<00:00, 12.75it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 21.42it/s]

Self-play player wins: 14, Training player wins: 13, Draws: 13
Win + draw rate for training player: 0.65


Win1/Win2/Draw: 17/1/2: 100%|██████████| 20/20 [00:00<00:00, 20.97it/s]
Win1/Win2/Draw: 3/15/2: 100%|██████████| 20/20 [00:00<00:00, 26.81it/s]


Training player vs random. Wins: 32, Losses: 4, Draws: 4
Updating self-play player.
Restoring from step: 134
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/134.checkpoint


100%|██████████| 10/10 [00:00<00:00, 16.33it/s]
100%|██████████| 1000/1000 [00:03<00:00, 316.20it/s]
100%|██████████| 10/10 [00:00<00:00, 14.96it/s]
100%|██████████| 1000/1000 [00:03<00:00, 266.19it/s]
Win1/Win2/Draw: 2/0/0:  10%|█         | 2/20 [00:00<00:01, 14.19it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 10/7/3: 100%|██████████| 20/20 [00:01<00:00, 14.86it/s]
Win1/Win2/Draw: 7/6/7: 100%|██████████| 20/20 [00:01<00:00, 13.78it/s]
Win1/Win2/Draw: 4/0/1:  25%|██▌       | 5/20 [00:00<00:00, 24.79it/s]

Self-play player wins: 16, Training player wins: 14, Draws: 10
Win + draw rate for training player: 0.6


Win1/Win2/Draw: 18/0/2: 100%|██████████| 20/20 [00:00<00:00, 25.31it/s]
Win1/Win2/Draw: 7/12/1: 100%|██████████| 20/20 [00:00<00:00, 29.27it/s]


Training player vs random. Wins: 30, Losses: 7, Draws: 3
Updating self-play player.
Restoring from step: 136
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/136.checkpoint


100%|██████████| 10/10 [00:00<00:00, 20.75it/s]
100%|██████████| 1000/1000 [00:03<00:00, 294.74it/s]
100%|██████████| 10/10 [00:00<00:00, 21.70it/s]
100%|██████████| 1000/1000 [00:03<00:00, 282.99it/s]
Win1/Win2/Draw: 1/1/1:  15%|█▌        | 3/20 [00:00<00:01, 16.77it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/6/2: 100%|██████████| 20/20 [00:01<00:00, 11.83it/s]
Win1/Win2/Draw: 1/18/1: 100%|██████████| 20/20 [00:01<00:00, 14.83it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 25.16it/s]

Self-play player wins: 30, Training player wins: 7, Draws: 3
Win + draw rate for training player: 0.25


Win1/Win2/Draw: 18/2/0: 100%|██████████| 20/20 [00:00<00:00, 29.58it/s]
Win1/Win2/Draw: 2/13/5: 100%|██████████| 20/20 [00:00<00:00, 38.60it/s]
 30%|███       | 3/10 [00:00<00:00, 23.86it/s]

Training player vs random. Wins: 31, Losses: 4, Draws: 5


100%|██████████| 10/10 [00:00<00:00, 25.08it/s]
100%|██████████| 1000/1000 [00:02<00:00, 351.81it/s]
100%|██████████| 10/10 [00:00<00:00, 23.25it/s]
100%|██████████| 1000/1000 [00:02<00:00, 343.28it/s]
Win1/Win2/Draw: 0/1/2:  15%|█▌        | 3/20 [00:00<00:00, 18.23it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 3/1/16: 100%|██████████| 20/20 [00:01<00:00, 16.19it/s]
Win1/Win2/Draw: 6/14/0: 100%|██████████| 20/20 [00:01<00:00, 19.30it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 34.89it/s]

Self-play player wins: 17, Training player wins: 7, Draws: 16
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 18/0/2: 100%|██████████| 20/20 [00:00<00:00, 33.69it/s]
Win1/Win2/Draw: 3/13/4: 100%|██████████| 20/20 [00:00<00:00, 36.71it/s]


Training player vs random. Wins: 31, Losses: 3, Draws: 6
Updating self-play player.
Restoring from step: 140
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/140.checkpoint


100%|██████████| 10/10 [00:00<00:00, 20.87it/s]
100%|██████████| 1000/1000 [00:02<00:00, 347.08it/s]
100%|██████████| 10/10 [00:00<00:00, 23.96it/s]
100%|██████████| 1000/1000 [00:03<00:00, 319.44it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:01, 15.08it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 10/5/5: 100%|██████████| 20/20 [00:01<00:00, 16.58it/s]
Win1/Win2/Draw: 11/7/2: 100%|██████████| 20/20 [00:01<00:00, 18.37it/s]
Win1/Win2/Draw: 6/0/1:  35%|███▌      | 7/20 [00:00<00:00, 39.01it/s]

Self-play player wins: 17, Training player wins: 16, Draws: 7
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 16/2/2: 100%|██████████| 20/20 [00:00<00:00, 35.86it/s]
Win1/Win2/Draw: 5/13/2: 100%|██████████| 20/20 [00:00<00:00, 31.75it/s]


Training player vs random. Wins: 29, Losses: 7, Draws: 4
Updating self-play player.
Restoring from step: 142
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/142.checkpoint


100%|██████████| 10/10 [00:00<00:00, 21.72it/s]
100%|██████████| 1000/1000 [00:02<00:00, 341.87it/s]
100%|██████████| 10/10 [00:00<00:00, 17.51it/s]
100%|██████████| 1000/1000 [00:03<00:00, 283.31it/s]
Win1/Win2/Draw: 0/3/0:  15%|█▌        | 3/20 [00:00<00:01, 16.18it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 13/6/1: 100%|██████████| 20/20 [00:01<00:00, 12.85it/s]
Win1/Win2/Draw: 14/5/1: 100%|██████████| 20/20 [00:01<00:00, 10.55it/s]
Win1/Win2/Draw: 3/1/0:  20%|██        | 4/20 [00:00<00:00, 23.21it/s]

Self-play player wins: 18, Training player wins: 20, Draws: 2
Win + draw rate for training player: 0.55


Win1/Win2/Draw: 17/2/1: 100%|██████████| 20/20 [00:01<00:00, 18.50it/s]
Win1/Win2/Draw: 7/10/3: 100%|██████████| 20/20 [00:00<00:00, 20.63it/s]
 10%|█         | 1/10 [00:00<00:00,  9.90it/s]

Training player vs random. Wins: 27, Losses: 9, Draws: 4


100%|██████████| 10/10 [00:00<00:00, 13.08it/s]
100%|██████████| 1000/1000 [00:03<00:00, 292.11it/s]
100%|██████████| 10/10 [00:00<00:00, 17.04it/s]
100%|██████████| 1000/1000 [00:03<00:00, 292.64it/s]
Win1/Win2/Draw: 0/3/0:  15%|█▌        | 3/20 [00:00<00:00, 18.08it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/8/0: 100%|██████████| 20/20 [00:01<00:00, 16.21it/s]
Win1/Win2/Draw: 8/2/10: 100%|██████████| 20/20 [00:01<00:00, 13.10it/s]
Win1/Win2/Draw: 5/0/0:  25%|██▌       | 5/20 [00:00<00:00, 29.33it/s]

Self-play player wins: 14, Training player wins: 16, Draws: 10
Win + draw rate for training player: 0.65


Win1/Win2/Draw: 19/0/1: 100%|██████████| 20/20 [00:00<00:00, 23.96it/s]
Win1/Win2/Draw: 9/10/1: 100%|██████████| 20/20 [00:00<00:00, 24.91it/s]


Training player vs random. Wins: 29, Losses: 9, Draws: 2
Updating self-play player.
Restoring from step: 146
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/146.checkpoint


100%|██████████| 10/10 [00:00<00:00, 13.66it/s]
100%|██████████| 1000/1000 [00:03<00:00, 288.27it/s]
100%|██████████| 10/10 [00:00<00:00, 20.37it/s]
100%|██████████| 1000/1000 [00:03<00:00, 325.89it/s]
Win1/Win2/Draw: 2/0/0:  10%|█         | 2/20 [00:00<00:01, 15.61it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 18/1/1: 100%|██████████| 20/20 [00:01<00:00, 18.36it/s]
Win1/Win2/Draw: 16/2/2: 100%|██████████| 20/20 [00:01<00:00, 14.77it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 34.19it/s]

Self-play player wins: 20, Training player wins: 17, Draws: 3
Win + draw rate for training player: 0.5


Win1/Win2/Draw: 19/1/0: 100%|██████████| 20/20 [00:00<00:00, 33.11it/s]
Win1/Win2/Draw: 6/12/2: 100%|██████████| 20/20 [00:00<00:00, 32.05it/s]
 30%|███       | 3/10 [00:00<00:00, 22.81it/s]

Training player vs random. Wins: 31, Losses: 7, Draws: 2


100%|██████████| 10/10 [00:00<00:00, 22.84it/s]
100%|██████████| 1000/1000 [00:03<00:00, 321.92it/s]
100%|██████████| 10/10 [00:00<00:00, 21.92it/s]
100%|██████████| 1000/1000 [00:03<00:00, 315.08it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 21.44it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 19/1/0: 100%|██████████| 20/20 [00:01<00:00, 18.99it/s]
Win1/Win2/Draw: 9/1/10: 100%|██████████| 20/20 [00:02<00:00, 10.75it/s]
Win1/Win2/Draw: 8/0/0:  40%|████      | 8/20 [00:00<00:00, 45.29it/s]

Self-play player wins: 20, Training player wins: 10, Draws: 10
Win + draw rate for training player: 0.5


Win1/Win2/Draw: 20/0/0: 100%|██████████| 20/20 [00:00<00:00, 35.30it/s]
Win1/Win2/Draw: 6/11/3: 100%|██████████| 20/20 [00:00<00:00, 33.41it/s]
 30%|███       | 3/10 [00:00<00:00, 20.58it/s]

Training player vs random. Wins: 31, Losses: 6, Draws: 3


100%|██████████| 10/10 [00:00<00:00, 19.55it/s]
100%|██████████| 1000/1000 [00:03<00:00, 316.13it/s]
100%|██████████| 10/10 [00:00<00:00, 20.02it/s]
100%|██████████| 1000/1000 [00:03<00:00, 303.52it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:00, 19.30it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 16/4/0: 100%|██████████| 20/20 [00:01<00:00, 18.45it/s]
Win1/Win2/Draw: 17/1/2: 100%|██████████| 20/20 [00:01<00:00, 14.99it/s]
Win1/Win2/Draw: 3/0/2:  25%|██▌       | 5/20 [00:00<00:00, 29.28it/s]

Self-play player wins: 17, Training player wins: 21, Draws: 2
Win + draw rate for training player: 0.575


Win1/Win2/Draw: 16/2/2: 100%|██████████| 20/20 [00:00<00:00, 27.05it/s]
Win1/Win2/Draw: 7/10/3: 100%|██████████| 20/20 [00:00<00:00, 34.01it/s]


Training player vs random. Wins: 26, Losses: 9, Draws: 5
Updating self-play player.
Restoring from step: 152
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/152.checkpoint


100%|██████████| 10/10 [00:01<00:00,  8.57it/s]
100%|██████████| 1000/1000 [00:03<00:00, 260.73it/s]
100%|██████████| 10/10 [00:00<00:00, 10.28it/s]
100%|██████████| 1000/1000 [00:03<00:00, 295.55it/s]
Win1/Win2/Draw: 2/1/0:  15%|█▌        | 3/20 [00:00<00:00, 19.09it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 12/2/6: 100%|██████████| 20/20 [00:01<00:00, 17.82it/s]
Win1/Win2/Draw: 19/1/0: 100%|██████████| 20/20 [00:00<00:00, 20.45it/s]
Win1/Win2/Draw: 6/0/0:  30%|███       | 6/20 [00:00<00:00, 33.72it/s]

Self-play player wins: 13, Training player wins: 21, Draws: 6
Win + draw rate for training player: 0.675


Win1/Win2/Draw: 19/1/0: 100%|██████████| 20/20 [00:00<00:00, 35.78it/s]
Win1/Win2/Draw: 7/10/3: 100%|██████████| 20/20 [00:00<00:00, 38.69it/s]


Training player vs random. Wins: 29, Losses: 8, Draws: 3
Updating self-play player.
Restoring from step: 154
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/154.checkpoint


100%|██████████| 10/10 [00:00<00:00, 14.60it/s]
100%|██████████| 1000/1000 [00:02<00:00, 351.87it/s]
100%|██████████| 10/10 [00:00<00:00, 22.86it/s]
100%|██████████| 1000/1000 [00:02<00:00, 359.40it/s]
Win1/Win2/Draw: 3/0/0:  15%|█▌        | 3/20 [00:00<00:00, 17.58it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 15/5/0: 100%|██████████| 20/20 [00:01<00:00, 19.26it/s]
Win1/Win2/Draw: 11/6/3: 100%|██████████| 20/20 [00:01<00:00, 18.26it/s]
Win1/Win2/Draw: 4/0/0:  20%|██        | 4/20 [00:00<00:00, 19.72it/s]

Self-play player wins: 21, Training player wins: 16, Draws: 3
Win + draw rate for training player: 0.475


Win1/Win2/Draw: 16/1/3: 100%|██████████| 20/20 [00:01<00:00, 18.65it/s]
Win1/Win2/Draw: 5/13/2: 100%|██████████| 20/20 [00:00<00:00, 29.46it/s]
 20%|██        | 2/10 [00:00<00:00, 16.81it/s]

Training player vs random. Wins: 29, Losses: 6, Draws: 5


100%|██████████| 10/10 [00:00<00:00, 15.30it/s]
100%|██████████| 1000/1000 [00:03<00:00, 329.59it/s]
100%|██████████| 10/10 [00:00<00:00, 18.04it/s]
100%|██████████| 1000/1000 [00:03<00:00, 275.54it/s]
Win1/Win2/Draw: 0/0/2:  10%|█         | 2/20 [00:00<00:01,  9.33it/s]

Evaluating. Self-player vs training, then training vs self-player


Win1/Win2/Draw: 7/1/12: 100%|██████████| 20/20 [00:01<00:00, 10.49it/s]
Win1/Win2/Draw: 14/5/1: 100%|██████████| 20/20 [00:01<00:00, 12.11it/s]
Win1/Win2/Draw: 0/1/0:   5%|▌         | 1/20 [00:00<00:02,  6.68it/s]

Self-play player wins: 12, Training player wins: 15, Draws: 13
Win + draw rate for training player: 0.7


Win1/Win2/Draw: 17/3/0: 100%|██████████| 20/20 [00:01<00:00, 11.04it/s]
Win1/Win2/Draw: 7/11/2: 100%|██████████| 20/20 [00:00<00:00, 26.10it/s]


Training player vs random. Wins: 28, Losses: 10, Draws: 2
Updating self-play player.
Restoring from step: 158
INFO:tensorflow:Restoring parameters from experiments/experiment-2018-05-16_22:17:36/checkpoints/158.checkpoint


100%|██████████| 10/10 [00:00<00:00, 18.15it/s]
  5%|▌         | 54/1000 [00:00<00:03, 268.42it/s]


KeyboardInterrupt: 