# Scratchpad & example notebook for rgizero project.


In [None]:

import os
from pathlib import Path

import numpy as np
import torch
import torch.nn.functional as F

# Game and players
from rgi.rgizero.games.count21 import Count21Game
from rgi.rgizero.games.connect4 import Connect4Game
from rgi.rgizero.players.alphazero import AlphazeroPlayer
from rgi.rgizero.players.random_player import RandomPlayer


from rgi.rgizero.common import TOKENS

from notebook_utils import reload_local_modules

print("✅ Imports successful")

assert torch.cuda.is_available()

print("✅ cuda available")

# Increase numpy print width
np.set_printoptions(linewidth=300)

DATA_DIR = Path.cwd().parent / "data" / "rgizero-e2e"
os.makedirs(DATA_DIR, exist_ok=True)

## Step 1: Set up history-wrapped game

In [None]:
from rgi.rgizero.games.history_wrapper import HistoryTrackingGame

# Wrap our game with history tracking
# base_game, max_game_length = Count21Game(), 21
base_game, max_game_length = Connect4Game(), 7*6

game = HistoryTrackingGame(base_game)
state_0 = game.initial_state()
block_size = max_game_length + 2

print("✅ Using HistoryTrackingGame from module")
print(f"Game: {base_game.__class__.__name__}, Players: {game.num_players(state_0)}, Actions: {list(game.all_actions())}")

## Step 2: Confirm we can read & write to trajectory_dataset

In [None]:
from rgi.rgizero.data.trajectory_dataset import Vocab

all_actions = game.all_actions()
num_players = game.num_players(state_0)

vocab = Vocab(itos=(TOKENS.START_OF_GAME,) + all_actions)
print(f"Vocab: {vocab}")

In [None]:
from rgi.rgizero.data.trajectory_dataset import TrajectoryDatasetBuilder, TrajectoryDataset, build_trajectory_loader

def add_random_trajectory(rng, game, td_builder):    
    state = game.initial_state()
    while not game.is_terminal(state):
        legal_actions = game.legal_actions(state)
        action = rng.choice(legal_actions)
        state = game.next_state(state, action)
    
    td_builder.add_trajectory(
        actions = np.array(vocab.encode(state.action_history)),
        policies = np.array(np.random.rand(len(state.action_history), len(all_actions)+1)),
        values = np.array(np.random.rand(len(state.action_history), num_players)),
    )

td_builder = TrajectoryDatasetBuilder(vocab)
rng = np.random.default_rng(42)
for _ in range(1000):
    add_random_trajectory(rng, game, td_builder)

td_builder.save(DATA_DIR, 'train_1000')

In [None]:
td = TrajectoryDataset(DATA_DIR, 'train_1000', 5)
td[0]


In [None]:
loader = build_trajectory_loader(
    DATA_DIR, 'train_1000', block_size=5, batch_size=1,
    device_is_cuda=False, workers=4)

for batch in loader:
    print(batch)