# Step-by-step run of alphazero self-play & training.


In [None]:
import os
import time
from pathlib import Path
from collections import defaultdict, Counter
import asyncio
from typing import Callable

import numpy as np
import torch
import torch.nn.functional as F

# Game and players
from rgi.rgizero.games.connect4 import Connect4Game
from rgi.rgizero.players.alphazero import AlphazeroPlayer
from rgi.rgizero.players.alphazero import play_game

import notebook_utils
from notebook_utils import reload_local_modules

device = notebook_utils.detect_device()

# Allow asyncio to work with jupyter notebook
import nest_asyncio
nest_asyncio.apply()

# Increase numpy print width
np.set_printoptions(linewidth=300)

%load_ext line_profiler

In [None]:
# MODEL_SIZE = "8l.2h.128e"
MODEL_SIZE = "32l.4h.512e"
CONFIG_ALIAS = f'play-games_size-{MODEL_SIZE}'

## Step 1: Set up history-wrapped game


In [None]:
from rgi.rgizero.games.history_wrapper import HistoryTrackingGame
from rgi.rgizero.data.trajectory_dataset import Vocab
from rgi.rgizero.common import TOKENS

base_game, max_game_length = Connect4Game(connect_length=4), 7*6

game = HistoryTrackingGame(base_game)
state_0 = game.initial_state()
block_size = max_game_length + 2
all_actions = game.all_actions()
action_vocab = Vocab(itos=[TOKENS.START_OF_GAME] + list(all_actions))
n_max_context = max_game_length + 2
game_name = base_game.__class__.__name__

print("âœ… Using HistoryTrackingGame from module")
print(f"Game: {game_name}, Players: {game.num_players(state_0)}, Actions: {list(game.all_actions())}")

DATA_DIR = Path.cwd().parent / "data" / "rgizero-e2e" / game_name / CONFIG_ALIAS
print("Creating data dir: ", DATA_DIR)
os.makedirs(DATA_DIR, exist_ok=True)

MODEL_DIR = Path.cwd().parent / "models" / "rgizero-e2e" / game_name / CONFIG_ALIAS
print("Creating model dir: ", MODEL_DIR)
os.makedirs(MODEL_DIR, exist_ok=True)


## Step 2: Create random generation_0 model


In [None]:
reload_local_modules(verbose=False)

from rgi.rgizero.models.action_history_transformer import ActionHistoryTransformer
from rgi.rgizero.models.transformer import TransformerConfig

model_config_dict = {
    "2l.1h.8e": TransformerConfig(n_max_context=n_max_context, n_layer=2, n_head=1, n_embd=8),
    "4l.1h.32e": TransformerConfig(n_max_context=n_max_context, n_layer=4, n_head=1, n_embd=32),
    "8l.2h.128e": TransformerConfig(n_max_context=n_max_context, n_layer=8, n_head=2, n_embd=128),
    "16l.2h.256e": TransformerConfig(n_max_context=n_max_context, n_layer=16, n_head=2, n_embd=256),
    "32l.4h.512e": TransformerConfig(n_max_context=n_max_context, n_layer=32, n_head=4, n_embd=512),
}

from rgi.rgizero.models.tuner import create_random_model

# Make model initialization deterministic
model_config = model_config_dict[MODEL_SIZE]
model_0 = create_random_model(model_config, action_vocab_size=action_vocab.vocab_size, num_players=game.num_players(state_0), seed=42, device=device)

# Step 3: Define play & generation code


In [None]:
from rgi.rgizero.models.action_history_transformer import AsyncNetworkEvaluator, ActionHistoryTransformerEvaluator
from rgi.rgizero.players.alphazero import play_game_async
from tqdm.asyncio import tqdm

async def play_games_async(num_games: int, player_factory: Callable[[], AlphazeroPlayer], max_concurrent_games: int = 1000):
    sem = asyncio.Semaphore(max_concurrent_games)
    tasks = []
    async def create_player_and_create_game():
        async with sem:
            t0 = time.time()
            player = player_factory()
            game_result = await play_game_async(game, [player, player])
            t1 = time.time()
            game_result['time'] = t1 - t0
            return game_result

    tasks = [create_player_and_create_game() for _ in range(num_games)]
    results = await tqdm.gather(*tasks)   # same as asyncio.gather, but with a progress bar
    return results

async def go(model, num_games, simulations, max_concurrent_games, verbose=False):
    t0 = time.time()
    serial_evaluator = ActionHistoryTransformerEvaluator(model, device=device, block_size=block_size, vocab=action_vocab, verbose=verbose)
    async_evaluator = AsyncNetworkEvaluator(base_evaluator=serial_evaluator, max_batch_size=max_concurrent_games, verbose=False)

    master_rng = np.random.default_rng(42)
    async_player_factory = lambda: AlphazeroPlayer(game, async_evaluator, rng=np.random.default_rng(master_rng.integers(0, 2**31)), add_noise=False, simulations=simulations)

    await async_evaluator.start()
    results = await play_games_async(num_games=num_games, player_factory=async_player_factory, max_concurrent_games=max_concurrent_games)
    await async_evaluator.stop()
    t1 = time.time()
    elapsed = t1 - t0   
    print(f"model_size: {MODEL_SIZE}, elapsed: {elapsed:.2f} seconds, num_games={num_games}, num_simulations={simulations}, max_concurrent_games={max_concurrent_games}, elapsed_per_game={elapsed/num_games:.2f} seconds")
    print()
    # return results

await go(model=model_0, num_games=1, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=2, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=5, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=10, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=20, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=50, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=100, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=200, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=500, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=1000, simulations=100, max_concurrent_games=100000, verbose=True)
await go(model=model_0, num_games=2000, simulations=100, max_concurrent_games=100000, verbose=True)

# num_games=200, simulations=100, model_size=tiny, 37.6 seconds, 100% CPU,  1% GPU, 5.32it/s
# num_games=200, simulations=100, model_size=xl,   67.6 seconds, 100% CPU, 15% GPU, 2.95it/s
# num_games=200, simulations=100, model_size=xxl,  77.1 seconds, 100% CPU, 65% GPU, 2.57it/s

# num_games=2000, simulations=100, model_size=xxl,  77.1 seconds, 100% CPU, 65% GPU, 3.20it/s

# xxl
# Evaluation time: 0.26 seconds, size=4000, average_time=0.00 seconds. -> 56% GPU. Single CPU can't keep it fed.

# model=tiny, m4
# Evaluation time: 0.122 seconds, size=4000, eval-per-second=32736.93, mean-eval-per-second=9887.77, mean-time-per-batch=0.405
# Evaluation time: 0.127 seconds, size=10, eval-per-second=78.70, mean-eval-per-second=39.18, mean-time-per-batch=0.255           # 10 games, 10 sims, first game finished after 20s
# Evaluation time: 0.018 seconds, size=1, eval-per-second=55.68, mean-eval-per-second=3.87, mean-time-per-batch=0.258             # 1 game, 10 sims, 2.9s


## m4 output. 200 simulations. tiny model -> 2l.1h.8e.
# model_size: tiny, elapsed: 3.50 seconds, num_games=1, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=3.50 seconds
# model_size: tiny, elapsed: 3.77 seconds, num_games=2, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=1.89 seconds
# model_size: tiny, elapsed: 3.11 seconds, num_games=5, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.62 seconds
# model_size: tiny, elapsed: 3.42 seconds, num_games=10, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.34 seconds
# model_size: tiny, elapsed: 4.12 seconds, num_games=20, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.21 seconds
# model_size: tiny, elapsed: 5.62 seconds, num_games=50, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.11 seconds
# model_size: tiny, elapsed: 8.64 seconds, num_games=100, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.09 seconds
# model_size: tiny, elapsed: 14.48 seconds, num_games=200, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.07 seconds
# model_size: tiny, elapsed: 27.65 seconds, num_games=500, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.06 seconds
# model_size: tiny, elapsed: 58.85 seconds, num_games=1000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.06 seconds
# model_size: tiny, elapsed: 130.92 seconds, num_games=2000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.07 seconds

## m4 8l.2h.128e model
# model_size: 8l.2h.128e, elapsed: 6.57 seconds, num_games=1, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=6.57 seconds
# model_size: 8l.2h.128e, elapsed: 5.23 seconds, num_games=2, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=2.62 seconds
# model_size: 8l.2h.128e, elapsed: 4.50 seconds, num_games=5, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.90 seconds
# model_size: 8l.2h.128e, elapsed: 5.99 seconds, num_games=10, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.60 seconds
# model_size: 8l.2h.128e, elapsed: 7.76 seconds, num_games=20, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.39 seconds
# model_size: 8l.2h.128e, elapsed: 10.44 seconds, num_games=50, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.21 seconds
# model_size: 8l.2h.128e, elapsed: 15.68 seconds, num_games=100, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.16 seconds
# model_size: 8l.2h.128e, elapsed: 20.14 seconds, num_games=200, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.10 seconds
# model_size: 8l.2h.128e, elapsed: 41.23 seconds, num_games=500, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.08 seconds
# model_size: 8l.2h.128e, elapsed: 81.82 seconds, num_games=1000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.08 seconds
# model_size: 8l.2h.128e, elapsed: 185.52 seconds, num_games=2000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.09 seconds
# model_size: 8l.2h.128e, elapsed: 569.11 seconds, num_games=5000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.11 seconds

## m4 32l.4h.512e
# model_size: 32l.4h.512e, elapsed: 12.02 seconds, num_games=1, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=12.02 seconds
# model_size: 32l.4h.512e, elapsed: 9.00 seconds, num_games=2, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=4.50 seconds
# model_size: 32l.4h.512e, elapsed: 13.42 seconds, num_games=5, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=2.68 seconds
# model_size: 32l.4h.512e, elapsed: 14.85 seconds, num_games=10, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=1.48 seconds
# model_size: 32l.4h.512e, elapsed: 18.80 seconds, num_games=20, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.94 seconds
# model_size: 32l.4h.512e, elapsed: 38.02 seconds, num_games=50, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.76 seconds
# model_size: 32l.4h.512e, elapsed: 64.89 seconds, num_games=100, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.65 seconds
# model_size: 32l.4h.512e, elapsed: 140.28 seconds, num_games=200, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.70 seconds
# model_size: 32l.4h.512e, elapsed: 335.16 seconds, num_games=500, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.67 seconds
# model_size: 32l.4h.512e, elapsed: 675.47 seconds, num_games=1000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.68 seconds
# model_size: 32l.4h.512e, elapsed: 1461.05 seconds, num_games=2000, num_simulations=100, max_concurrent_games=100000, elapsed_per_game=0.73 seconds
