In [None]:
%cd ../..
%reload_ext autoreload
%autoreload 2

In [None]:
import pyspiel
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from math import sqrt
from collections import defaultdict

from alpha_one.model.model_manager import OpenSpielModelManager, OpenSpielModelConfig, PolicyGradientModelManager
from alpha_one.model.agent import RandomAgent, MCTSAgent, PolicyGradientAgent
from alpha_one.model.evaluation.agent import AgentEvaluator
from alpha_one.utils.mcts import initialize_bot, initialize_rollout_mcts_bot
from alpha_one.metrics import EloRatingSystem, TrueSkillRatingSystem
from alpha_one.plots import PlotManager

Let several (trained) agents play games against each other and report tournament metrics

In [None]:
game_name = 'connect_four'
game = pyspiel.load_game(game_name)

In [None]:
model_manager_az = OpenSpielModelManager(game_name, 'C4')
print(model_manager_az.list_runs())

In [None]:
model_az_run_name = 'C4-13'
checkpoint_manager_az = model_manager_az.get_checkpoint_manager(model_az_run_name)
print(checkpoint_manager_az.list_checkpoints())

In [None]:
model_az_iteration = -1

# 2. Load models

## 2.1. Load trained AlphaZero

In [None]:
model_az = checkpoint_manager_az.load_checkpoint(model_az_iteration)

az_bot = initialize_bot(game, model_az, uct_c=sqrt(2), max_simulations=100)
agent_az = MCTSAgent(game, az_bot, 0, 20)

## 2.2. Build random agent

In [None]:
agent_random = RandomAgent(game)

## 2.3. Build MCTS agent

In [None]:
rollout_bot = initialize_rollout_mcts_bot(game, 1, uct_c=sqrt(2), max_simulations=100)
agent_rollout_mcts = MCTSAgent(game, rollout_bot, temperature=0)

## 2.4. Build untrained AlphaZero

In [None]:
model_az_untrained = checkpoint_manager_az.build_model(OpenSpielModelConfig(game, 'mlp', 1, 1, 0, 0))
az_bot_untrained = initialize_bot(game, model_az_untrained, uct_c=sqrt(2), max_simulations=100)
agent_az_untrained = MCTSAgent(game, az_bot_untrained, 0, 20)

## 2.5 Load Policy Gradient Baseline

In [None]:
model_manager_pg = PolicyGradientModelManager(game_name)
model_manager_pg.list_runs()

In [None]:
run_name_pg = 'PG-4'
checkpoint_manager_pg = model_manager_pg.get_checkpoint_manager(run_name_pg)
model_pg = checkpoint_manager_pg.load_checkpoint(0)
agent_pg = PolicyGradientAgent(model_pg)

# 3. Tournament

In [None]:
agents = [agent_az, agent_random, agent_rollout_mcts, agent_az_untrained, agent_pg]
agent_names = [f"Alpha Zero ({model_az_run_name})", "Random", "MCTS Rollout", "Alpha Zero untrained", "Policy Gradient"]

In [None]:
evaluator = AgentEvaluator(game)
elo_rating_system = EloRatingSystem(40)
true_skill_rating_system = TrueSkillRatingSystem()
rating_systems = [elo_rating_system, true_skill_rating_system]

elo_ratings_history = []
true_skill_ratings_history = []
ratings_histories = [elo_ratings_history, true_skill_ratings_history]

rating_system_names = ['Elo Rating', 'TrueSkill Rating']

In [1]:
def play_match(agent_id_player_1, agent_id_player_2):
    match_outcome, trajectory = evaluator.evaluate(agents[agent_id_player_1], agents[agent_id_player_2])
    match_outcome.with_renamed_players({0: agent_id_player_1, 1: agent_id_player_2})
    return match_outcome

In [None]:
n_match_days = 5
for _ in range(n_match_days):
    match_outcomes = []
    for agent_id_1, agent_id_2 in combinations(range(len(agents)), 2):
        match_outcomes.append(play_match(agent_id_1, agent_id_2))
        match_outcomes.append(play_match(agent_id_2, agent_id_1))
            
    for rating_system, ratings_history in zip(rating_systems, ratings_histories):
        rating_system.update_ratings(match_outcomes)
        ratings_history.append(rating_system.players.copy())

# 4. Plot Results

In [None]:
plot_manager = PlotManager(game_name, model_az_run_name)

In [None]:
for rating_system, ratings_history, rating_system_name in zip(rating_systems, ratings_histories, rating_system_names):
    plt.title(f"Tournament ({rating_system_name})")
    for player_id in range(len(agents)):
        plt.plot(range(1, len(ratings_history) + 1), [rating[player_id] for rating in ratings_history], label=agent_names[player_id])
    plt.legend()
    plt.xlabel("Matchday")
    plt.ylabel(rating_system_name)
    plot_manager.save_current_plot(f"tournament_{rating_system_name}.pdf")
    plt.show()

In [None]:
print("Final Elo ratings:")
for player_id in range(len(agents)):
    print(f" - {agent_names[player_id]}: {elo_rating_system.players[player_id]:.0f}")

In [None]:
win_probability_matrix = np.zeros((len(agents), len(agents)))
for agent_1 in range(len(agents)):
    for agent_2 in range(len(agents)):
        win_probability_matrix[agent_1, agent_2] = elo_rating_system.calculate_win_probability(elo_rating_system.get_rating(agent_1), elo_rating_system.get_rating(agent_2))

In [None]:
plt.figure(figsize=(12, 8))
ax = plt.gca()
plt.title("Probabilities of winning against other models", pad=20)
ax.matshow(win_probability_matrix, cmap=matplotlib.colors.LinearSegmentedColormap.from_list("", ["red", "white", "green"]))
ax.set_xticklabels(['']+agent_names)
ax.set_yticklabels(['']+agent_names)
plt.tight_layout()
plt.show()