In [1]:
from kaggle_environments import evaluate, make
import time
import numpy as np
import torch
import gymnasium as gym

from agents.mcts_agent import mcts_agent
from agents.minimax_agent import minimax_agent
from agents.rainbow_agent import rainbow_agent
from agents.ppo_agent import ppo_agent
from agents.alphazero_agent import alphazero_agent

env = make("connectx", debug=True)
print(env.render(mode="ansi"))

[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO: Successfully loaded OpenSpiel environments: 8.
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_chess
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_connect_four
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_gin_rummy
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_go
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_tic_tac_toe
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_universal_poker
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_repeated_poker
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_python_repeated_pokerkit
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO: OpenSpiel games skipped: 0.
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+--

In [2]:
# Random agent
def random_agent(observation, configuration):
    from random import choice
    return choice([c for c in range(configuration.columns) if observation.board[c] == 0])

In [None]:
# Evaluating agent against random and negamax
def mean_reward(rewards, role):
    if role == "P1":
        return np.mean([r[0] for r in rewards])
    else:
        return np.mean([r[1] for r in rewards])

def evaluate_agent(agent, num_episodes=10):
    rewards_random_p1 = evaluate("connectx", [agent, "random"], num_episodes=num_episodes)
    rewards_random_p2 = evaluate("connectx", ["random", agent], num_episodes=num_episodes)
    rewards_negamax_p1 = evaluate("connectx", [agent, "negamax"], num_episodes=num_episodes)
    rewards_negamax_p2 = evaluate("connectx", ["negamax", agent], num_episodes=num_episodes)

    print(f"Evaluating agent against random and negamax with {num_episodes} games as P1 and P2...\n")
    
    print(f"P1 vs Random: {mean_reward(rewards_random_p1, 'P1')}")
    print(rewards_random_p1)
    print(f"P1 vs Negamax: {mean_reward(rewards_negamax_p1, 'P1')}")  
    print(rewards_negamax_p1)

    print(f"\nP2 vs Random: {mean_reward(rewards_random_p2, 'P2')}")
    print(rewards_random_p2)
    print(f"P2 vs Negamax: {mean_reward(rewards_negamax_p2, 'P2')}")
    print(rewards_negamax_p2)

In [4]:
# Debugging agent against random
def debug_agent(agent, agent_opponent="random"):
    env = make("connectx", debug=True)
    trainer = env.train([None, agent_opponent])
    observation = trainer.reset()

    while not env.done:
        t0 = time.time()
        my_action = agent(observation, env.configuration)
        t1 = time.time()
        print(f"Turn {observation.step + 1}: Action {my_action + 1} (in {t1 - t0:.2f} seconds)")
        observation, reward, done, info = trainer.step(my_action)   

    print(f"\nYou won :)\n" if env.state[0].reward > 0 else "\nYou lost :(\n")
    print(env.render(mode="ansi"))

In [5]:
# Facing agents
def facing_agents(agent_a, agent_b, num_episodes=10):
    a_rewards = []
    b_rewards = []

    env = make("connectx", debug=True)

    for i in range(num_episodes):
        if i < num_episodes // 2:
            env.run([agent_a, agent_b])
            a_rewards.append(env.state[0].reward)
            b_rewards.append(env.state[1].reward)
        else:
            env.run([agent_b, agent_a])
            b_rewards.append(env.state[0].reward)
            a_rewards.append(env.state[1].reward)

    print(f"Agent A won {a_rewards.count(1)} times")
    print(f"Agent B won {b_rewards.count(1)} times")
    print(f"Draw {a_rewards.count(0)} times")
    return a_rewards, b_rewards

In [21]:
# Debugging and evaluating Minimax
debug_agent(minimax_agent)
evaluate_agent(minimax_agent)

Turn 1: Action 2 (in 0.34 seconds)
Turn 3: Action 3 (in 0.42 seconds)
Turn 5: Action 2 (in 0.66 seconds)
Turn 7: Action 2 (in 0.60 seconds)
Turn 9: Action 2 (in 0.11 seconds)

You won :)

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 1 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 1 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 1 | 1 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 1 | 2 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+

Evaluating agent against random and negamax with 10 games as P1 and P2...

P1 vs Random: 1.0
P1 vs Negamax: 1.0

P2 vs Random: 1.0
P2 vs Negamax: 1.0


In [22]:
# Debugging and evaluating MCTS
debug_agent(mcts_agent)
evaluate_agent(mcts_agent)

Turn 1: Action 3 (in 1.90 seconds)
Turn 3: Action 4 (in 1.90 seconds)
Turn 5: Action 2 (in 1.90 seconds)
Turn 7: Action 1 (in 1.90 seconds)

You won :)

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 0 | 2 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 1 | 1 | 1 | 1 | 0 | 0 |
+---+---+---+---+---+---+---+

Evaluating agent against random and negamax with 10 games as P1 and P2...

P1 vs Random: 1.0
P1 vs Negamax: 1.0

P2 vs Random: 1.0
P2 vs Negamax: 1.0


In [39]:
# Debugging and evaluating Rainbow
debug_agent(rainbow_agent)
evaluate_agent(rainbow_agent)

Turn 1: Action 4 (in 0.03 seconds)
Turn 3: Action 5 (in 0.00 seconds)
Turn 5: Action 5 (in 0.00 seconds)
Turn 7: Action 5 (in 0.00 seconds)
Turn 9: Action 5 (in 0.00 seconds)

You won :)

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 1 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 1 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 1 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 1 | 2 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 2 | 2 | 2 |
+---+---+---+---+---+---+---+

Evaluating agent against random and negamax with 10 games as P1 and P2...

P1 vs Random: 1.0
P1 vs Negamax: 0.4

P2 vs Random: 1.0
P2 vs Negamax: 0.5


In [None]:
# Debugging and evaluating PPO
debug_agent(ppo_agent)
evaluate_agent(ppo_agent)

Turn 1: Action 4 (in 0.00 seconds)
Turn 3: Action 4 (in 0.00 seconds)
Turn 5: Action 4 (in 0.00 seconds)
Turn 7: Action 4 (in 0.00 seconds)

You won :)

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 0 | 1 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+

Evaluating agent against random and negamax with 10 games as P1 and P2...

P1 vs Random: 1.0
P1 vs Negamax: 0.6

P2 vs Random: 1.0
P2 vs Negamax: 0.6


In [8]:
# Debugging and evaluating Alphazero
debug_agent(alphazero_agent)
evaluate_agent(alphazero_agent)

MCTS: 984 simulations, best action: 5
Turn 1: Action 6 (in 1.80 seconds)
MCTS: 790 simulations, best action: 4
Turn 3: Action 5 (in 1.80 seconds)
MCTS: 7512 simulations, best action: 3
Turn 5: Action 4 (in 1.80 seconds)
Winning move at column 2
Turn 7: Action 3 (in 0.00 seconds)

You won :)

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 0 | 0 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 1 | 1 | 1 | 1 | 0 |
+---+---+---+---+---+---+---+

Evaluating agent against random and negamax with 10 games as P1 and P2...

P1 vs Random: 1.0
[[1, -1], [1, -1], [1, -1], [1, -1], [1, -1], [1, -1], [1, -1], [1, -1], [1, -1], [1, -1]]
P1 vs Negamax: 0.7

[[0, 0], [0, 0], [1, -1], [1, -1], [1, -1], [1, -1], [0, 0], [1, -1], [1, -1], [1, -1]]
P2 vs Random: 1.0
[[-1, 

In [None]:
# Facing different agents
print("Facing MCTS vs Minimax")
res = facing_agents(mcts_agent, minimax_agent)

print("\nFacing MCTS vs Rainbow")
res = facing_agents(mcts_agent, rainbow_agent)

print("\nFacing MCTS vs PPO")
res = facing_agents(mcts_agent, ppo_agent)

print("\nFacing MCTS vs Alphazero")
res = facing_agents(mcts_agent, alphazero_agent)

Facing MCTS vs Minimax
Agent A won 7 times
Agent B won 1 times
Draw 2 times

Facing MCTS vs Rainbow
Agent A won 9 times
Agent B won 0 times
Draw 1 times

Facing MCTS vs PPO
Agent A won 10 times
Agent B won 0 times
Draw 0 times

Facing Minimax vs Rainbow
Agent A won 10 times
Agent B won 0 times
Draw 0 times

Facing Minimax vs PPO
Agent A won 10 times
Agent B won 0 times
Draw 0 times

Facing Rainbow vs PPO
Agent A won 2 times
Agent B won 7 times
Draw 1 times


In [None]:
print("\nFacing Minimax vs Rainbow")
res = facing_agents(minimax_agent, rainbow_agent)

print("\nFacing Minimax vs PPO")
res = facing_agents(minimax_agent, ppo_agent)

print("\nFacing Minimax vs Alphazero")
res = facing_agents(minimax_agent, alphazero_agent)

In [None]:
print("\nFacing Rainbow vs PPO")
res = facing_agents(rainbow_agent, ppo_agent)

print("\nFacing Rainbow vs Alphazero")
res = facing_agents(rainbow_agent, alphazero_agent)

In [None]:
print("\nFacing PPO vs Alphazero")
res = facing_agents(ppo_agent, alphazero_agent)