In [1]:
import glob

checkpoint_path = "/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints"
checkpoints = glob.glob(f"{checkpoint_path}/*.ckpt")

# Sort checkpoints by epoch number
checkpoints = sorted(checkpoints, key=lambda x: int(x.split("epoch=")[1].split("-")[0]))


for checkpoint in checkpoints:
    print(checkpoint)


/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=99-step=28780.ckpt
/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=199-step=87860.ckpt
/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=299-step=155200.ckpt
/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=399-step=226810.ckpt
/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=499-step=299530.ckpt
/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=599-step=374190.ckpt
/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=699-step=449980.

In [2]:
from collections import defaultdict

# Initialize Elo ratings for each checkpoint
elo_ratings: dict[str, int] = defaultdict(lambda: 1500)  # Start each model at 1500
K = 32  # K-factor for Elo calculation


def calculate_expected_score(rating1: int, rating2: int) -> float:
    """Calculate expected score for player 1 against player 2"""
    return 1 / (1 + 10 ** ((rating2 - rating1) / 400))


def update_elo(
    rating1: int, rating2: int, score: float, K: int = 32
) -> tuple[int, int]:
    """Update Elo ratings based on game outcome
    score: 1 for win, 0.5 for draw, 0 for loss (from rating1's perspective)
    """
    expected = calculate_expected_score(rating1, rating2)
    new_rating1 = rating1 + K * (score - expected)
    new_rating2 = rating2 + K * ((1 - score) - (1 - expected))
    return int(new_rating1), int(new_rating2)

In [3]:
from simulator.game.connect import Config

from alphazero_implementation.models.games.connect4 import CNNModel
from alphazero_implementation.models.model import Model

config = Config(6, 7, 4)


def load_model(checkpoint: str) -> Model:
    return CNNModel.load_from_checkpoint(  # type: ignore[arg-type]
        checkpoint,
        height=config.height,
        width=config.width,
        max_actions=config.width,
        num_players=config.num_players,
    )


In [4]:
from alphazero_implementation.models.model import Model
from alphazero_implementation.textual.agent import AlphaZeroAgent


def play_game(model1: Model, model2: Model) -> int:
    agent1 = AlphaZeroAgent(model1)
    agent2 = AlphaZeroAgent(model2)

    state = config.sample_initial_state()

    while not state.has_ended:
        current_agent = agent1 if state.player == 0 else agent2
        action = current_agent.predict_best_action(state)
        state = action.sample_next_state()
    winner = state.reward.argmax()  # type: ignore[attr-defined]
    return winner


In [5]:
from itertools import combinations

# Here you would add code to make models play against each other
# For each pair of checkpoints:
for checkpoint1, checkpoint2 in combinations(checkpoints, 2):
    # Load models and make them play against each other
    model1 = load_model(checkpoint1)
    model2 = load_model(checkpoint2)
    # Update Elo ratings based on game results
    # Example (you'll need to implement actual game logic):
    result = play_game(
        model1, model2
    )  # Returns 1 for model1 win, 0.5 for draw, 0 for model2 win
    print(f"{checkpoint1} vs {checkpoint2}: {result}")
    elo_ratings[checkpoint1], elo_ratings[checkpoint2] = update_elo(
        elo_ratings[checkpoint1], elo_ratings[checkpoint2], result
    )

# Print final Elo ratings
for checkpoint in checkpoints:
    print(f"Checkpoint {checkpoint}: Elo {elo_ratings[checkpoint]:.1f}")


/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=99-step=28780.ckpt vs /Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=199-step=87860.ckpt: 1
/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=99-step=28780.ckpt vs /Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=299-step=155200.ckpt: 0
/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=99-step=28780.ckpt vs /Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=399-step=226810.ckpt: 0
/Users/pveron/Code/alphazero-implementation/lightning_logs/alphazero/run_168_iter200_episodes100_sims100/checkpoints/epoch=99

In [1]:
# Get checkpoint with highest Elo rating
best_checkpoint = max(elo_ratings.items(), key=lambda x: x[1])
print(
    f"\nBest checkpoint: {best_checkpoint[0]} with Elo rating: {best_checkpoint[1]:.1f}"
)


NameError: name 'elo_ratings' is not defined