In [None]:
from kaggle_environments import evaluate, make, utils
import os
import sys
import inspect
import time
import numpy as np
import torch
import gymnasium as gym
from tianshou.algorithm.modelfree.dqn import DiscreteQLearningPolicy

from Submissions.mcts_agent import mcts_agent
from Submissions.minimax_agent import minimax_agent
from Submissions.rainbow.rainbow_agent import rainbow_agent
from Submissions.ppo.ppo_agent import ppo_agent

env = make("connectx", debug=True)
print(env.render(mode="ansi"))

[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO: Successfully loaded OpenSpiel environments: 8.
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_chess
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_connect_four
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_gin_rummy
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_go
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_tic_tac_toe
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_universal_poker
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_repeated_poker
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO:    open_spiel_python_repeated_pokerkit
[kaggle_environments.envs.open_spiel_env.open_spiel_env] INFO: OpenSpiel games skipped: 0.
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+--

In [2]:
# Random agent
def random_agent(observation, configuration):
    from random import choice
    return choice([c for c in range(configuration.columns) if observation.board[c] == 0])

In [4]:
# Evaluating agent against random and negamax
def mean_reward(rewards, role):
    if role == "P1":
        return np.mean([r[0] for r in rewards])
    else:
        return np.mean([r[1] for r in rewards])

def evaluate_agent(agent, num_episodes=10):
    rewards_random_p1 = evaluate("connectx", [agent, "random"], num_episodes=num_episodes)
    rewards_random_p2 = evaluate("connectx", ["random", agent], num_episodes=num_episodes)
    rewards_negamax_p1 = evaluate("connectx", [agent, "negamax"], num_episodes=num_episodes)
    rewards_negamax_p2 = evaluate("connectx", ["negamax", agent], num_episodes=num_episodes)

    print(f"Evaluating agent against random and negamax with {num_episodes} games as P1 and P2...\n")
    
    print(f"P1 vs Random: {mean_reward(rewards_random_p1, 'P1')}")
    print(f"P1 vs Negamax: {mean_reward(rewards_negamax_p1, 'P1')}\n")  

    print(f"P2 vs Random: {mean_reward(rewards_random_p2, 'P2')}")
    print(f"P2 vs Negamax: {mean_reward(rewards_negamax_p2, 'P2')}")

In [5]:
# Debugging agent against random
def debug_agent(agent):
    env = make("connectx", debug=True)
    trainer = env.train([None, "random"])
    observation = trainer.reset()

    while not env.done:
        t0 = time.time()
        my_action = agent(observation, env.configuration)
        t1 = time.time()
        print(f"Turn {observation.step + 1}: Action {my_action} (in {t1 - t0:.2f} seconds)")
        observation, reward, done, info = trainer.step(my_action)   

    print(f"\nYou won :)\n" if env.state[0].reward > 0 else "\nYou lost :(\n")
    print(env.render(mode="ansi"))

In [6]:
# Facing agents
def facing_agents(agent_a, agent_b, num_episodes=10):
    a_rewards = []
    b_rewards = []

    env = make("connectx", debug=True)

    for i in range(num_episodes):
        if i < num_episodes // 2:
            env.run([agent_a, agent_b])
            a_rewards.append(env.state[0].reward)
            b_rewards.append(env.state[1].reward)
        else:
            env.run([agent_b, agent_a])
            b_rewards.append(env.state[0].reward)
            a_rewards.append(env.state[1].reward)

    print(f"Agent A won {a_rewards.count(1)} times")
    print(f"Agent B won {b_rewards.count(1)} times")
    print(f"Draw {a_rewards.count(0)} times")
    return a_rewards, b_rewards

In [7]:
# Debugging and evaluating Minimax
debug_agent(minimax_agent)
evaluate_agent(minimax_agent)

Turn 1: Action 2 (in 0.36 seconds)
Turn 3: Action 3 (in 0.45 seconds)
Turn 5: Action 4 (in 0.30 seconds)
Turn 7: Action 3 (in 0.47 seconds)
Turn 9: Action 3 (in 0.61 seconds)
Turn 11: Action 3 (in 0.29 seconds)

You won :)

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 2 | 1 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 1 | 2 | 1 | 0 | 2 |
+---+---+---+---+---+---+---+

Evaluating agent against random and negamax with 10 games as P1 and P2...

P1 vs Random: 1.0
P1 vs Negamax: 1.0

P2 vs Random: 1.0
P2 vs Negamax: 0.9


In [None]:
# Debugging and evaluating MCTS
debug_agent(mcts_agent)
evaluate_agent(mcts_agent)

Turn 1: Action 3 (in 1.90 seconds)
Turn 3: Action 3 (in 1.90 seconds)
Turn 5: Action 3 (in 1.90 seconds)
Turn 7: Action 3 (in 1.90 seconds)

You won :)

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 2 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 2 | 2 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+



KeyboardInterrupt: 

In [None]:
# Debugging and evaluating rainbow
debug_agent(rainbow_agent)
evaluate_agent(rainbow_agent)

In [None]:
# Debugging and evaluating ppo
debug_agent(ppo_agent)
evaluate_agent(ppo_agent)

In [None]:
# Facing different agents
print("Facing MCTS vs Minimax")
res = facing_agents(mcts_agent, minimax_agent)

print("\nFacing MCTS vs Rainbow")
res = facing_agents(mcts_agent, rainbow_agent)

print("\nFacing MCTS vs PPO")
res = facing_agents(minimax_agent, ppo_agent)

print("\nFacing Minimax vs Rainbow")
res = facing_agents(minimax_agent, rainbow_agent)

print("\nFacing Minimax vs PPO")
res = facing_agents(minimax_agent, ppo_agent)

print("\nFacing Rainbow vs PPO")
res = facing_agents(rainbow_agent, ppo_agent)


Facing Minimax vs Rainbow
Agent A won 10 times
Agent B won 0 times
Draw 0 times
([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1])

Facing Minimax vs Rainbow
Agent A won 10 times
Agent B won 0 times
Draw 0 times
([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1])
