# Step-by-step run of alphazero self-play & training.


In [1]:
import os
import time
from pathlib import Path
from collections import defaultdict, Counter
import asyncio
from typing import Callable

import numpy as np
import torch
import torch.nn.functional as F

# Game and players
from rgi.rgizero.games.connect4 import Connect4Game
from rgi.rgizero.players.alphazero import AlphazeroPlayer
from rgi.rgizero.players.alphazero import play_game

import notebook_utils
from notebook_utils import reload_local_modules

device = notebook_utils.detect_device()

# Allow asyncio to work with jupyter notebook
import nest_asyncio
nest_asyncio.apply()

# Increase numpy print width
np.set_printoptions(linewidth=300)

%load_ext line_profiler

Detected device: mps


In [None]:
# MODEL_SIZE = "8l.2h.128e"
MODEL_SIZE = "32l.4h.512e"
CONFIG_ALIAS = f'play-games_size-{MODEL_SIZE}'

## Step 1: Set up history-wrapped game


In [3]:
from rgi.rgizero.games.history_wrapper import HistoryTrackingGame
from rgi.rgizero.data.trajectory_dataset import Vocab
from rgi.rgizero.common import TOKENS

base_game, max_game_length = Connect4Game(connect_length=4), 7*6

game = HistoryTrackingGame(base_game)
state_0 = game.initial_state()
block_size = max_game_length + 2
all_actions = game.all_actions()
action_vocab = Vocab(itos=[TOKENS.START_OF_GAME] + list(all_actions))
n_max_context = max_game_length + 2
game_name = base_game.__class__.__name__

print("✅ Using HistoryTrackingGame from module")
print(f"Game: {game_name}, Players: {game.num_players(state_0)}, Actions: {list(game.all_actions())}")

DATA_DIR = Path.cwd().parent / "data" / "rgizero-e2e" / game_name / CONFIG_ALIAS
print("Creating data dir: ", DATA_DIR)
os.makedirs(DATA_DIR, exist_ok=True)

MODEL_DIR = Path.cwd().parent / "models" / "rgizero-e2e" / game_name / CONFIG_ALIAS
print("Creating model dir: ", MODEL_DIR)
os.makedirs(MODEL_DIR, exist_ok=True)


✅ Using HistoryTrackingGame from module
Game: Connect4Game, Players: 2, Actions: [1, 2, 3, 4, 5, 6, 7]
Creating data dir:  /Users/rodo/src/rgi3-sync/data/rgizero-e2e/Connect4Game/play-games_size-8l.2h.128e
Creating model dir:  /Users/rodo/src/rgi3-sync/models/rgizero-e2e/Connect4Game/play-games_size-8l.2h.128e


## Step 2: Create random generation_0 model


In [4]:
reload_local_modules(verbose=False)

from rgi.rgizero.models.action_history_transformer import ActionHistoryTransformer
from rgi.rgizero.models.transformer import TransformerConfig

model_config_dict = {
    "2l.1h.8e": TransformerConfig(n_max_context=n_max_context, n_layer=2, n_head=1, n_embd=8),
    "4l.1h.32e": TransformerConfig(n_max_context=n_max_context, n_layer=4, n_head=1, n_embd=32),
    "8l.2h.128e": TransformerConfig(n_max_context=n_max_context, n_layer=8, n_head=2, n_embd=128),
    "16l.2h.256e": TransformerConfig(n_max_context=n_max_context, n_layer=16, n_head=2, n_embd=256),
    "32l.4h.512e": TransformerConfig(n_max_context=n_max_context, n_layer=32, n_head=4, n_embd=512),
}

from rgi.rgizero.models.tuner import create_random_model

# Make model initialization deterministic
model_config = model_config_dict[MODEL_SIZE]
model_0 = create_random_model(model_config, action_vocab_size=action_vocab.vocab_size, num_players=game.num_players(state_0), seed=42, device=device)

transform_config_fields: {'bias', 'n_embd', 'n_max_context', 'n_head', 'dropout', 'n_layer'}
train_config_fields: {'eval_iters', 'beta2', 'log_interval', 'beta1', 'always_save_checkpoint', 'lr_decay_iters', 'weight_decay', 'eval_interval', 'eval_only', 'warmup_iters', 'max_iters', 'model_name', 'batch_size', 'device', 'learning_rate', 'dtype', 'grad_clip', 'model_version', 'wandb_log', 'min_lr', 'compile', 'max_epochs', 'decay_lr', 'gradient_accumulation_steps'}


# Step 3: Define play & generation code


In [None]:
from rgi.rgizero.models.action_history_transformer import AsyncNetworkEvaluator, ActionHistoryTransformerEvaluator
from rgi.rgizero.players.alphazero import play_game_async
from tqdm.asyncio import tqdm

async def play_games_async(num_games: int, player_factory: Callable[[], AlphazeroPlayer], max_concurrent_games: int = 1000):
    sem = asyncio.Semaphore(max_concurrent_games)
    tasks = []
    async def create_player_and_create_game():
        async with sem:
            t0 = time.time()
            player = player_factory()
            game_result = await play_game_async(game, [player, player])
            t1 = time.time()
            game_result['time'] = t1 - t0
            return game_result

    tasks = [create_player_and_create_game() for _ in range(num_games)]
    results = await tqdm.gather(*tasks)   # same as asyncio.gather, but with a progress bar
    return results

async def go(model, num_games, simulations, max_concurrent_games, verbose=False):
    t0 = time.time()
    serial_evaluator = ActionHistoryTransformerEvaluator(model, device=device, block_size=block_size, vocab=action_vocab, verbose=verbose)
    async_evaluator = AsyncNetworkEvaluator(base_evaluator=serial_evaluator, max_batch_size=max_concurrent_games, verbose=False)

    master_rng = np.random.default_rng(42)
    async_player_factory = lambda: AlphazeroPlayer(game, async_evaluator, rng=np.random.default_rng(master_rng.integers(0, 2**31)), add_noise=False, simulations=simulations)

    await async_evaluator.start()
    results = await play_games_async(num_games=num_games, player_factory=async_player_factory, max_concurrent_games=max_concurrent_games)
    await async_evaluator.stop()
    t1 = time.time()
    elapsed = t1 - t0   
    print(f"model_size: {MODEL_SIZE}, elapsed: {elapsed:.2f} seconds, num_games={num_games}, num_simulations={simulations}, max_concurrent_games={max_concurrent_games}, elapsed_per_game={elapsed/num_games:.2f} seconds")
    print()
    # return results

await go(model=model_0, num_games=1, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=2, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=5, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=10, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=20, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=50, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=100, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=200, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=500, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=1000, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=2000, simulations=100, max_concurrent_games=100000, verbose=True)

# num_games=200, simulations=100, model_size=tiny, 37.6 seconds, 100% CPU,  1% GPU, 5.32it/s
# num_games=200, simulations=100, model_size=xl,   67.6 seconds, 100% CPU, 15% GPU, 2.95it/s
# num_games=200, simulations=100, model_size=xxl,  77.1 seconds, 100% CPU, 65% GPU, 2.57it/s

# num_games=2000, simulations=100, model_size=xxl,  77.1 seconds, 100% CPU, 65% GPU, 3.20it/s

# xxl
# Evaluation time: 0.26 seconds, size=4000, average_time=0.00 seconds. -> 56% GPU. Single CPU can't keep it fed.

# model=tiny, m4
# Evaluation time: 0.122 seconds, size=4000, eval-per-second=32736.93, mean-eval-per-second=9887.77, mean-time-per-batch=0.405
# Evaluation time: 0.127 seconds, size=10, eval-per-second=78.70, mean-eval-per-second=39.18, mean-time-per-batch=0.255           # 10 games, 10 sims, first game finished after 20s
# Evaluation time: 0.018 seconds, size=1, eval-per-second=55.68, mean-eval-per-second=3.87, mean-time-per-batch=0.258             # 1 game, 10 sims, 2.9s


## m4 output. 200 simulations. tiny model -> 2l.1h.8e.
# Evaluation time: 0.001 seconds, size=1, eval-per-second=1079.89, total-batches=1000, mean-eval-per-second=686.36, mean-time-per-batch=0.001, mean-batch-size=1.00
# model_size: tiny, elapsed: 3.50 seconds, num_games=1, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=3.50 seconds

# Evaluation time: 0.001 seconds, size=1, eval-per-second=836.85, total-batches=2000, mean-eval-per-second=1136.79, mean-time-per-batch=0.001, mean-batch-size=1.59
# model_size: tiny, elapsed: 3.77 seconds, num_games=2, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=1.89 seconds

# Evaluation time: 0.001 seconds, size=1, eval-per-second=1235.80, total-batches=2000, mean-eval-per-second=2673.50, mean-time-per-batch=0.001, mean-batch-size=3.06
# model_size: tiny, elapsed: 3.11 seconds, num_games=5, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.62 seconds

# Evaluation time: 0.001 seconds, size=1, eval-per-second=1162.50, total-batches=2000, mean-eval-per-second=4498.09, mean-time-per-batch=0.001, mean-batch-size=5.57
# model_size: tiny, elapsed: 3.42 seconds, num_games=10, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.34 seconds

# Evaluation time: 0.001 seconds, size=1, eval-per-second=894.50, total-batches=2000, mean-eval-per-second=8286.58, mean-time-per-batch=0.001, mean-batch-size=11.89
# model_size: tiny, elapsed: 4.12 seconds, num_games=20, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.21 seconds

# Evaluation time: 0.001 seconds, size=4, eval-per-second=4439.59, total-batches=2000, mean-eval-per-second=17650.14, mean-time-per-batch=0.002, mean-batch-size=31.26
# model_size: tiny, elapsed: 5.62 seconds, num_games=50, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.11 seconds

# Evaluation time: 0.002 seconds, size=6, eval-per-second=3918.69, total-batches=2000, mean-eval-per-second=24290.54, mean-time-per-batch=0.002, mean-batch-size=58.26
# model_size: tiny, elapsed: 8.64 seconds, num_games=100, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.09 seconds

# Evaluation time: 0.001 seconds, size=12, eval-per-second=8621.39, total-batches=2000, mean-eval-per-second=32890.85, mean-time-per-batch=0.004, mean-batch-size=118.87
# model_size: tiny, elapsed: 14.48 seconds, num_games=200, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.07 seconds

# Evaluation time: 0.001 seconds, size=26, eval-per-second=21701.87, total-batches=2000, mean-eval-per-second=48257.22, mean-time-per-batch=0.006, mean-batch-size=287.84
# model_size: tiny, elapsed: 27.65 seconds, num_games=500, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.06 seconds

# Evaluation time: 0.001 seconds, size=42, eval-per-second=29165.69, total-batches=2000, mean-eval-per-second=46082.36, mean-time-per-batch=0.012, mean-batch-size=553.45
# model_size: tiny, elapsed: 58.85 seconds, num_games=1000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.06 seconds

# Evaluation time: 0.002 seconds, size=85, eval-per-second=37694.63, total-batches=2000, mean-eval-per-second=49695.32, mean-time-per-batch=0.022, mean-batch-size=1111.89
# model_size: tiny, elapsed: 130.92 seconds, num_games=2000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.07 seconds



# ## m4 8l.2h.128e model
# 0%|          | 0/1 [00:00<?, ?it/s]Evaluation time: 0.002 seconds, size=1, eval-per-second=585.80, total-batches=1000, mean-eval-per-second=239.83, mean-time-per-batch=0.004, mean-batch-size=1.00
# 100%|██████████| 1/1 [00:06<00:00,  6.56s/it]
# model_size: 8l.2h.128e, elapsed: 6.57 seconds, num_games=1, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=6.57 seconds

#   0%|          | 0/2 [00:00<?, ?it/s]Evaluation time: 0.003 seconds, size=2, eval-per-second=639.18, total-batches=1000, mean-eval-per-second=574.83, mean-time-per-batch=0.003, mean-batch-size=2.00
# 100%|██████████| 2/2 [00:05<00:00,  2.61s/it]
# model_size: 8l.2h.128e, elapsed: 5.23 seconds, num_games=2, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=2.62 seconds

#  40%|████      | 2/5 [00:01<00:02,  1.38it/s]Evaluation time: 0.004 seconds, size=3, eval-per-second=763.34, total-batches=1000, mean-eval-per-second=1468.55, mean-time-per-batch=0.003, mean-batch-size=4.05
# 100%|██████████| 5/5 [00:04<00:00,  1.11it/s]
# model_size: 8l.2h.128e, elapsed: 4.50 seconds, num_games=5, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.90 seconds

#  40%|████      | 4/10 [00:02<00:03,  1.63it/s]Evaluation time: 0.003 seconds, size=5, eval-per-second=1730.18, total-batches=1000, mean-eval-per-second=2802.86, mean-time-per-batch=0.003, mean-batch-size=8.67
# 100%|██████████| 10/10 [00:05<00:00,  1.67it/s]
# model_size: 8l.2h.128e, elapsed: 5.99 seconds, num_games=10, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.60 seconds

#  50%|█████     | 10/20 [00:03<00:02,  4.01it/s]Evaluation time: 0.003 seconds, size=11, eval-per-second=3891.48, total-batches=1000, mean-eval-per-second=5334.04, mean-time-per-batch=0.003, mean-batch-size=17.52
#  95%|█████████▌| 19/20 [00:06<00:00,  3.46it/s]Evaluation time: 0.002 seconds, size=1, eval-per-second=609.37, total-batches=2000, mean-eval-per-second=3686.38, mean-time-per-batch=0.003, mean-batch-size=11.29
# 100%|██████████| 20/20 [00:07<00:00,  2.58it/s]
# model_size: 8l.2h.128e, elapsed: 7.76 seconds, num_games=20, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.39 seconds

#  46%|████▌     | 23/50 [00:05<00:03,  7.35it/s]Evaluation time: 0.004 seconds, size=28, eval-per-second=6795.93, total-batches=1000, mean-eval-per-second=10745.07, mean-time-per-batch=0.004, mean-batch-size=43.95
#  96%|█████████▌| 48/50 [00:09<00:00,  7.83it/s]Evaluation time: 0.003 seconds, size=1, eval-per-second=327.09, total-batches=2000, mean-eval-per-second=7408.76, mean-time-per-batch=0.004, mean-batch-size=29.20
# 100%|██████████| 50/50 [00:10<00:00,  4.79it/s]
# model_size: 8l.2h.128e, elapsed: 10.44 seconds, num_games=50, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.21 seconds

#  43%|████▎     | 43/100 [00:07<00:07,  7.32it/s]Evaluation time: 0.008 seconds, size=58, eval-per-second=7454.94, total-batches=1000, mean-eval-per-second=15532.31, mean-time-per-batch=0.006, mean-batch-size=87.86
#  96%|█████████▌| 96/100 [00:14<00:00, 11.98it/s]Evaluation time: 0.003 seconds, size=3, eval-per-second=1085.30, total-batches=2000, mean-eval-per-second=10064.37, mean-time-per-batch=0.006, mean-batch-size=58.42
# 100%|██████████| 100/100 [00:15<00:00,  6.38it/s]
# model_size: 8l.2h.128e, elapsed: 15.68 seconds, num_games=100, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.16 seconds

#  42%|████▏     | 84/200 [00:10<00:08, 13.16it/s]Evaluation time: 0.004 seconds, size=117, eval-per-second=30178.56, total-batches=1000, mean-eval-per-second=25773.10, mean-time-per-batch=0.007, mean-batch-size=176.62
#  96%|█████████▋| 193/200 [00:18<00:00, 12.69it/s]Evaluation time: 0.004 seconds, size=7, eval-per-second=1915.71, total-batches=2000, mean-eval-per-second=17621.53, mean-time-per-batch=0.007, mean-batch-size=115.93
# 100%|██████████| 200/200 [00:20<00:00,  9.93it/s]
# model_size: 8l.2h.128e, elapsed: 20.14 seconds, num_games=200, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.10 seconds

#  50%|████▉     | 249/500 [00:25<00:08, 28.81it/s]Evaluation time: 0.024 seconds, size=257, eval-per-second=10713.69, total-batches=1000, mean-eval-per-second=31961.52, mean-time-per-batch=0.014, mean-batch-size=436.23
#  98%|█████████▊| 489/500 [00:39<00:00, 18.03it/s]Evaluation time: 0.004 seconds, size=11, eval-per-second=3029.57, total-batches=2000, mean-eval-per-second=22318.12, mean-time-per-batch=0.012, mean-batch-size=273.35
# 100%|██████████| 500/500 [00:41<00:00, 12.13it/s]
# model_size: 8l.2h.128e, elapsed: 41.23 seconds, num_games=500, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.08 seconds

#  48%|████▊     | 476/1000 [00:51<00:20, 25.66it/s]Evaluation time: 0.048 seconds, size=528, eval-per-second=10958.93, total-batches=1000, mean-eval-per-second=37194.78, mean-time-per-batch=0.024, mean-batch-size=874.89
#  97%|█████████▋| 971/1000 [01:18<00:02, 14.32it/s]Evaluation time: 0.006 seconds, size=30, eval-per-second=4732.55, total-batches=2000, mean-eval-per-second=25181.74, mean-time-per-batch=0.022, mean-batch-size=551.68
# 100%|██████████| 1000/1000 [01:21<00:00, 12.22it/s]
# model_size: 8l.2h.128e, elapsed: 81.82 seconds, num_games=1000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.08 seconds

#  46%|████▌     | 915/2000 [01:50<00:40, 26.70it/s] Evaluation time: 0.058 seconds, size=1092, eval-per-second=18673.04, total-batches=1000, mean-eval-per-second=39731.10, mean-time-per-batch=0.044, mean-batch-size=1758.91
#  96%|█████████▋| 1926/2000 [02:53<00:06, 11.23it/s]Evaluation time: 0.004 seconds, size=75, eval-per-second=18868.33, total-batches=2000, mean-eval-per-second=24882.65, mean-time-per-batch=0.045, mean-batch-size=1115.59
# 100%|██████████| 2000/2000 [03:05<00:00, 10.78it/s]
# model_size: 8l.2h.128e, elapsed: 185.52 seconds, num_games=2000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.09 seconds

#  44%|████▍     | 2214/5000 [06:35<02:35, 17.92it/s]  Evaluation time: 0.140 seconds, size=2787, eval-per-second=19839.12, total-batches=1000, mean-eval-per-second=28919.27, mean-time-per-batch=0.152, mean-batch-size=4392.81
#  96%|█████████▌| 4812/5000 [09:06<00:15, 12.40it/s]Evaluation time: 0.009 seconds, size=191, eval-per-second=20241.35, total-batches=2000, mean-eval-per-second=22692.19, mean-time-per-batch=0.123, mean-batch-size=2793.36
# 100%|██████████| 5000/5000 [09:29<00:00,  8.79it/s]
# model_size: 8l.2h.128e, elapsed: 569.11 seconds, num_games=5000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.11 seconds


  0%|          | 0/1 [00:00<?, ?it/s]

Evaluation time: 0.002 seconds, size=1, eval-per-second=585.80, total-batches=1000, mean-eval-per-second=239.83, mean-time-per-batch=0.004, mean-batch-size=1.00


100%|██████████| 1/1 [00:06<00:00,  6.56s/it]


model_size: 8l.2h.128e, elapsed: 6.57 seconds, num_games=1, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=6.57 seconds



  0%|          | 0/2 [00:00<?, ?it/s]

Evaluation time: 0.003 seconds, size=2, eval-per-second=639.18, total-batches=1000, mean-eval-per-second=574.83, mean-time-per-batch=0.003, mean-batch-size=2.00


100%|██████████| 2/2 [00:05<00:00,  2.61s/it]


model_size: 8l.2h.128e, elapsed: 5.23 seconds, num_games=2, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=2.62 seconds



 40%|████      | 2/5 [00:01<00:02,  1.38it/s]

Evaluation time: 0.004 seconds, size=3, eval-per-second=763.34, total-batches=1000, mean-eval-per-second=1468.55, mean-time-per-batch=0.003, mean-batch-size=4.05


100%|██████████| 5/5 [00:04<00:00,  1.11it/s]


model_size: 8l.2h.128e, elapsed: 4.50 seconds, num_games=5, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.90 seconds



 40%|████      | 4/10 [00:02<00:03,  1.63it/s]

Evaluation time: 0.003 seconds, size=5, eval-per-second=1730.18, total-batches=1000, mean-eval-per-second=2802.86, mean-time-per-batch=0.003, mean-batch-size=8.67


100%|██████████| 10/10 [00:05<00:00,  1.67it/s]


model_size: 8l.2h.128e, elapsed: 5.99 seconds, num_games=10, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.60 seconds



 50%|█████     | 10/20 [00:03<00:02,  4.01it/s]

Evaluation time: 0.003 seconds, size=11, eval-per-second=3891.48, total-batches=1000, mean-eval-per-second=5334.04, mean-time-per-batch=0.003, mean-batch-size=17.52


 95%|█████████▌| 19/20 [00:06<00:00,  3.46it/s]

Evaluation time: 0.002 seconds, size=1, eval-per-second=609.37, total-batches=2000, mean-eval-per-second=3686.38, mean-time-per-batch=0.003, mean-batch-size=11.29


100%|██████████| 20/20 [00:07<00:00,  2.58it/s]


model_size: 8l.2h.128e, elapsed: 7.76 seconds, num_games=20, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.39 seconds



 46%|████▌     | 23/50 [00:05<00:03,  7.35it/s]

Evaluation time: 0.004 seconds, size=28, eval-per-second=6795.93, total-batches=1000, mean-eval-per-second=10745.07, mean-time-per-batch=0.004, mean-batch-size=43.95


 96%|█████████▌| 48/50 [00:09<00:00,  7.83it/s]

Evaluation time: 0.003 seconds, size=1, eval-per-second=327.09, total-batches=2000, mean-eval-per-second=7408.76, mean-time-per-batch=0.004, mean-batch-size=29.20


100%|██████████| 50/50 [00:10<00:00,  4.79it/s]


model_size: 8l.2h.128e, elapsed: 10.44 seconds, num_games=50, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.21 seconds



 43%|████▎     | 43/100 [00:07<00:07,  7.32it/s]

Evaluation time: 0.008 seconds, size=58, eval-per-second=7454.94, total-batches=1000, mean-eval-per-second=15532.31, mean-time-per-batch=0.006, mean-batch-size=87.86


 96%|█████████▌| 96/100 [00:14<00:00, 11.98it/s]

Evaluation time: 0.003 seconds, size=3, eval-per-second=1085.30, total-batches=2000, mean-eval-per-second=10064.37, mean-time-per-batch=0.006, mean-batch-size=58.42


100%|██████████| 100/100 [00:15<00:00,  6.38it/s]


model_size: 8l.2h.128e, elapsed: 15.68 seconds, num_games=100, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.16 seconds



 42%|████▏     | 84/200 [00:10<00:08, 13.16it/s]

Evaluation time: 0.004 seconds, size=117, eval-per-second=30178.56, total-batches=1000, mean-eval-per-second=25773.10, mean-time-per-batch=0.007, mean-batch-size=176.62


 96%|█████████▋| 193/200 [00:18<00:00, 12.69it/s]

Evaluation time: 0.004 seconds, size=7, eval-per-second=1915.71, total-batches=2000, mean-eval-per-second=17621.53, mean-time-per-batch=0.007, mean-batch-size=115.93


100%|██████████| 200/200 [00:20<00:00,  9.93it/s]


model_size: 8l.2h.128e, elapsed: 20.14 seconds, num_games=200, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.10 seconds



 50%|████▉     | 249/500 [00:25<00:08, 28.81it/s]

Evaluation time: 0.024 seconds, size=257, eval-per-second=10713.69, total-batches=1000, mean-eval-per-second=31961.52, mean-time-per-batch=0.014, mean-batch-size=436.23


 98%|█████████▊| 489/500 [00:39<00:00, 18.03it/s]

Evaluation time: 0.004 seconds, size=11, eval-per-second=3029.57, total-batches=2000, mean-eval-per-second=22318.12, mean-time-per-batch=0.012, mean-batch-size=273.35


100%|██████████| 500/500 [00:41<00:00, 12.13it/s]


model_size: 8l.2h.128e, elapsed: 41.23 seconds, num_games=500, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.08 seconds



 48%|████▊     | 476/1000 [00:51<00:20, 25.66it/s]

Evaluation time: 0.048 seconds, size=528, eval-per-second=10958.93, total-batches=1000, mean-eval-per-second=37194.78, mean-time-per-batch=0.024, mean-batch-size=874.89


 97%|█████████▋| 971/1000 [01:18<00:02, 14.32it/s]

Evaluation time: 0.006 seconds, size=30, eval-per-second=4732.55, total-batches=2000, mean-eval-per-second=25181.74, mean-time-per-batch=0.022, mean-batch-size=551.68


100%|██████████| 1000/1000 [01:21<00:00, 12.22it/s]


model_size: 8l.2h.128e, elapsed: 81.82 seconds, num_games=1000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.08 seconds



 46%|████▌     | 915/2000 [01:50<00:40, 26.70it/s] 

Evaluation time: 0.058 seconds, size=1092, eval-per-second=18673.04, total-batches=1000, mean-eval-per-second=39731.10, mean-time-per-batch=0.044, mean-batch-size=1758.91


 96%|█████████▋| 1926/2000 [02:53<00:06, 11.23it/s]

Evaluation time: 0.004 seconds, size=75, eval-per-second=18868.33, total-batches=2000, mean-eval-per-second=24882.65, mean-time-per-batch=0.045, mean-batch-size=1115.59


100%|██████████| 2000/2000 [03:05<00:00, 10.78it/s]


model_size: 8l.2h.128e, elapsed: 185.52 seconds, num_games=2000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.09 seconds



 44%|████▍     | 2214/5000 [06:35<02:35, 17.92it/s]  

Evaluation time: 0.140 seconds, size=2787, eval-per-second=19839.12, total-batches=1000, mean-eval-per-second=28919.27, mean-time-per-batch=0.152, mean-batch-size=4392.81


 96%|█████████▌| 4812/5000 [09:06<00:15, 12.40it/s]

Evaluation time: 0.009 seconds, size=191, eval-per-second=20241.35, total-batches=2000, mean-eval-per-second=22692.19, mean-time-per-batch=0.123, mean-batch-size=2793.36


100%|██████████| 5000/5000 [09:29<00:00,  8.79it/s]


model_size: 8l.2h.128e, elapsed: 569.11 seconds, num_games=5000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.11 seconds

