In [6]:
from kaggle_environments import evaluate, make, utils
from model import CNN
from copy import deepcopy
import torch
import numpy as np
import random


env = make("connectx", debug=True)
inarow = env.configuration["inarow"]
board_shape = (env.configuration["rows"], env.configuration["columns"],)
model = CNN(board_shape=board_shape)

def mutate_model(model: torch.nn.Module, mutation_power):
    mutated_model = deepcopy(model)
    for param in mutated_model.parameters():
        param.data += mutation_power * torch.randn_like(param)
    return mutated_model

class Agent:
    def __init__(self, model) -> None:
        self.model = model
    def __call__(self, observation, configuration) -> int:
        board = torch.reshape(
            torch.Tensor(observation["board"]),
            board_shape
        )
        unplayable_mask = -1 * torch.all(board != 0, dim=0)
        unplayable_columns = torch.nonzero(unplayable_mask).squeeze()
        scores = self.model(board.unsqueeze(0)) 
        scores *= unplayable_mask  # make negative any score where columns is filled
        action =  torch.argmax(scores).item()
        if action in unplayable_columns:
            action = random.choice([c for c in range(board_shape[-1]) if c not in unplayable_columns])
        return action

def train_agents(base_model):
    ngames = 1000

    winning_agent = Agent(base_model)

    n_times_model_changed = 0

    for i in range(ngames):
        mutation_power = 1 - i / ngames
        mutated_agent = Agent(mutate_model(winning_agent.model, mutation_power))

        env.reset()
        env.run([winning_agent, mutated_agent])
        env.render()
        # results = evaluate(
        #     "connectx",
        #     [winning_agent, mutated_agent],
        #     num_episodes=10,
        #     debug=True
        # )

        # winner = np.argmax(np.sum(results, axis=0))
        # if winner == 1:
        #     print ("New winner!")
        #     winning_agent = mutated_agent
        #     n_times_model_changed += 1
        # else:
        #     print ("The king lives!")

    # return n_times_model_changed, winning_agent    

train_agents(model)

# def my_agent(observation, configuration):
#     from random import choice
#     return choice([c for c in range(configuration.columns) if observation.board[c] == 0])

KeyboardInterrupt: 

In [4]:
trainer = env.train([None,"random"])
agent = Agent(model)
obs = trainer.reset()
action = agent(obs, env.configuration)

In [12]:
winning_agent = Agent(model)
mutated_agent = Agent(mutate_model(winning_agent.model, 1))

env.reset()
env.run([winning_agent, mutated_agent])
env.render(mode="ipython")