In [1]:
from kaggle_environments import evaluate, make, utils
import os
import sys
import inspect
import time
from pyplAI_algorithms import minimax_agent, mcts_agent

env = make("connectx", debug=True)
print(env.render(mode="ansi"))

[kaggle_environments.envs.open_spiel.open_spiel] INFO: Successfully loaded OpenSpiel environments: 6.
[kaggle_environments.envs.open_spiel.open_spiel] INFO:    open_spiel_chess
[kaggle_environments.envs.open_spiel.open_spiel] INFO:    open_spiel_connect_four
[kaggle_environments.envs.open_spiel.open_spiel] INFO:    open_spiel_gin_rummy
[kaggle_environments.envs.open_spiel.open_spiel] INFO:    open_spiel_go
[kaggle_environments.envs.open_spiel.open_spiel] INFO:    open_spiel_tic_tac_toe
[kaggle_environments.envs.open_spiel.open_spiel] INFO:    open_spiel_universal_poker
[kaggle_environments.envs.open_spiel.open_spiel] INFO: OpenSpiel games skipped: 0.
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0

In [2]:
# Random agent
def random_agent(observation, configuration):
    from random import choice
    return choice([c for c in range(configuration.columns) if observation.board[c] == 0])

In [3]:
# Evaluating agent against random and negamax
def mean_reward(rewards):
    return sum(r[0] for r in rewards) / len(rewards)

def evaluate_agent(agent, num_episodes=10):
    rewards_random = evaluate("connectx", [agent, "random"], num_episodes=num_episodes)
    rewards_negamax = evaluate("connectx", [agent, "negamax"], num_episodes=num_episodes)
    
    print("My Agent vs Random Agent:", mean_reward(rewards_random))
    print("My Agent vs Negamax Agent:", mean_reward(rewards_negamax))  

In [2]:
# Debugging agent against random
def debug_agent(agent):
    env = make("connectx", debug=True)
    trainer = env.train([None, "random"])
    observation = trainer.reset()

    while not env.done:
        t0 = time.time()
        my_action = agent(observation, env.configuration)
        t1 = time.time()
        print(f"Turn {observation.step + 1}: Action {my_action} (in {t1 - t0:.2f} seconds)")
        observation, reward, done, info = trainer.step(my_action)   

    print(f"\nYou won :)\n" if env.state[0].reward > 0 else "\nYou lost :(\n")
    print(env.render(mode="ansi"))

In [5]:
# Facing agents
def facing_agents(agent_a, agent_b, num_episodes=10):
    results = []

    env = make("connectx", debug=True)

    for i in range(num_episodes):
        env.reset()
        env.run([agent_a, agent_b])
        results.append(env.state[0].reward)

    print(f"Agent A won {results.count(1)} times")
    print(f"Agent B won {results.count(-1)} times")
    print(f"Draw {results.count(0)} times")

In [6]:
# Writing agent to file and validating submission format
def write_agent_to_file(agent):
    file = agent.__name__ + ".py"
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(agent))

def validate_submission(agent):
    out = sys.stdout
    agent = utils.read_file(agent.__name__ + ".py")
    sys.stdout = out

    env = make("connectx", debug=True)
    env.run([agent, agent])

    print("Success :)" if env.state[0].status == env.state[1].status == "DONE" else "Failed :(")

In [3]:
# Debugging Minimax
debug_agent(minimax_agent)

Turn 1: Action 2 (in 0.48 seconds)
Turn 3: Action 2 (in 0.63 seconds)
Turn 5: Action 1 (in 0.40 seconds)
Turn 7: Action 4 (in 0.47 seconds)
Turn 9: Action 2 (in 0.41 seconds)
Turn 11: Action 2 (in 0.13 seconds)

You won :)

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 1 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 1 | 0 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 1 | 1 | 0 | 1 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 2 | 1 | 0 | 2 | 0 | 2 |
+---+---+---+---+---+---+---+



In [None]:
# Evaluating Minimax
evaluate_agent(minimax_agent)

My Agent vs Random Agent: 1.0
My Agent vs Negamax Agent: 1.0


In [8]:
# Debugging MCTS
debug_agent(mcts_agent)

Turn 1: Action 3 (in 1.90 seconds)
Turn 3: Action 3 (in 1.90 seconds)
Turn 5: Action 3 (in 1.90 seconds)
Turn 7: Action 3 (in 1.90 seconds)

You won :)

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 2 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 2 | 1 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+



In [11]:
# Evaluating MCTS
evaluate_agent(mcts_agent)

My Agent vs Random Agent: 1.0
My Agent vs Negamax Agent: 0.9


In [12]:
# Facing MCTS vs Minimax
facing_agents(mcts_agent, minimax_agent)

Agent A won 7 times
Agent B won 3 times
Draw 0 times


In [None]:
# Creating agents submissions and validating them
# write_agent_to_file(minimax_agent)
validate_submission(minimax_agent)

# write_agent_to_file(mcts_agent)
validate_submission(mcts_agent)

Success :)
Success :)
