# Scratchpad for profiling rgizero code.


In [None]:

import os
from pathlib import Path

import numpy as np
import torch
import torch.nn.functional as F

# Game and players
from rgi.rgizero.games.connect4 import Connect4Game
from rgi.rgizero.players import alphazero
from rgi.rgizero.players.alphazero import AlphazeroPlayer


from rgi.rgizero.common import TOKENS

from notebook_utils import reload_local_modules

print("✅ Imports successful")

assert torch.cuda.is_available()

print("✅ cuda available")

# Increase numpy print width
np.set_printoptions(linewidth=300)

DATA_DIR = Path.cwd().parent / "data" / "rgizero-e2e"
os.makedirs(DATA_DIR, exist_ok=True)

%load_ext line_profiler

## Step 1: Set up history-wrapped game

In [None]:
from rgi.rgizero.games.history_wrapper import HistoryTrackingGame

# Connect5 to make it harder to connect! This helps test variable policy and longer games.
base_game, max_game_length = Connect4Game(connect_length=5), 7*6

game = HistoryTrackingGame(base_game)
state_0 = game.initial_state()
block_size = max_game_length + 2

print("✅ Using HistoryTrackingGame from module")
print(f"Game: {base_game.__class__.__name__}, Players: {game.num_players(state_0)}, Actions: {list(game.all_actions())}")

## Step 2: Confirm we can self-play a game with a Random Evaluator.

In [None]:
reload_local_modules(verbose=False)

from rgi.rgizero.players.alphazero import AlphazeroPlayer, play_game, NetworkEvaluatorResult, NetworkEvaluator
from typing import override, Any

class RandomEvaluator(NetworkEvaluator):
    def __init__(self, seed: int = 42):
        self.rng = np.random.default_rng(seed)

    @override
    def evaluate(self, game, state, legal_actions: list[Any]):
        policy = self.rng.random(len(legal_actions))
        values = self.rng.random(game.num_players(state))
        return NetworkEvaluatorResult(policy, values)

def play_deterministic_game(seed, evaluator=None, player=None, verbose=False):
    evaluator = evaluator or RandomEvaluator(seed=seed)
    player = player or AlphazeroPlayer(game, evaluator, rng=np.random.default_rng(seed))
    game_result = play_game(game, [player, player])
    if verbose:
        print(f'game length: {len(game_result["action_history"])}, simulations={player.simulations}')
        print(game_result['action_history'])
    return game_result

game_result = play_deterministic_game(42, verbose=True)

In [None]:
%%timeit

game_result = play_deterministic_game(42, verbose=True)

# Original %%timeit - 26.3 seconds.
# 3.28 s ± 60.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# Vectorized calcualtion of select_action_index
# 2.4 s ± 38.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# Vectorized & numba of select_action_index
# 1.7 s ± 32.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

In [None]:
# 3.3s to play single game. simulations=800

%prun -r -l 30 -s cumulative game_result = play_deterministic_game(42, verbose=True)


In [None]:
%lprun \
    -f alphazero.MCTSNode.select_action_index \
    game_result =play_deterministic_game(42, verbose=True)

