# Play Against AlphaZero

This notebook allows you to play interactively against the trained AlphaZero model.

In [None]:
import sys
import os
from pathlib import Path
import json

import nest_asyncio
nest_asyncio.apply()

import torch
from rgi.rgizero.games import game_registry
from rgi.rgizero.experiment import ExperimentRunner, ExperimentConfig
from rgi.rgizero.players.alphazero import AlphazeroPlayer
from rgi.rgizero.players.human_player import HumanPlayer
from rgi.rgizero.evaluators import ActionHistoryTransformerEvaluator

import notebook_utils
from notebook_utils import reload_local_modules


In [None]:
# Configuration
EXPERIMENT_NAME = "smoketest-e2e-v7"
GAME_NAME = "connect4"

DEVICE = notebook_utils.detect_device()

In [None]:
def load_model(experiment_name, generation):
    base_dir = Path.cwd().parent / 'experiments'
    exp_dir = base_dir / experiment_name
    config = ExperimentConfig.from_json(json.load(open(exp_dir / "config.json")))
    runner = ExperimentRunner(config, base_dir=base_dir)

    # Load Model
    model = runner.load_model(generation)
    model.to(DEVICE)
    model.eval()
    print(f"Loaded model for experiment {experiment_name} from generation {generation}")
    return model, runner

model_1, runner_1 = load_model(EXPERIMENT_NAME, 10)
model_2, runner_2 = load_model(EXPERIMENT_NAME, 1)

In [None]:
# Setup Players
game = game_registry.create_game(GAME_NAME)
evaluator_1 = ActionHistoryTransformerEvaluator(model_1, device=DEVICE, block_size=runner_1.n_max_context, vocab=runner_1.action_vocab)
evaluator_2 = ActionHistoryTransformerEvaluator(model_2, device=DEVICE, block_size=runner_2.n_max_context, vocab=runner_2.action_vocab)

# Adjust simulations or temperature as needed
ai_player_1 = AlphazeroPlayer(game, evaluator_1, simulations=200, temperature=0.0, print_thinking=True)
ai_player_2 = AlphazeroPlayer(game, evaluator_2, simulations=200, temperature=0.0, print_thinking=True)
human_player = HumanPlayer(game)

# Choose side: Human vs AI (Player 1 vs Player 2)
# players = [human_player, ai_player_1]
players = [ai_player_1, ai_player_2]

print("Players set up. Run the next cell to play!")

In [None]:
state = game.initial_state()

while not game.is_terminal(state):
    current_p_idx = game.current_player_id(state) - 1
    player = players[current_p_idx]
    
    # Human player handles printing board inside _select_action_from_user
    # But for AI turn, we might want to print it too to see what's happening
    if player != human_player:
        print(f"\nAI (Player {current_p_idx+1}) thinking...")
        # print(game.pretty_str(state))
    
    result = player.select_action(state)
    action = result.action
    
    if player != human_player:
        print(f"AI chose: {action}")
    
    state = game.next_state(state, action)
    print(game.pretty_str(state))

print("\nGame Over!")
print(game.pretty_str(state))
print(f"Rewards: {game.reward_array(state)}")

In [None]:

# print(f"Winner: {game.get_rewards(state)}")
game.reward_array(state)