In [1]:
from kaggle_environments import evaluate, make, utils
from rule_based_mcts import mcts_agent
from multiprocessing import Pool
import random
from collections import defaultdict
from pprint import pprint
from operator import itemgetter

In [2]:
config = {
    'columns': 7,
    'rows': 6,
    'inarow': 4,
}

In [3]:
random.seed(2304982340)

def random_mcts_config():
    return {
        'score_exp': random.random()*3 + 1, # uniform from 1 to 4
        'c_1': random.random()*1.5 + 0.5, # uniform from 0.5 to 2
        'c_2': int(10**(random.random()*3 + 3)),
    }

In [4]:
def create_random_agent():
    c = random_mcts_config()
    
    def _agent(observation, configuration):
        return mcts_agent(observation, configuration, **c)
    return _agent, c

In [5]:
num_agents = 30
agents = {i: create_random_agent() for i in range(num_agents)}

In [6]:
for a in agents.values():
    print(a[1])

{'score_exp': 3.9583926933365094, 'c_1': 1.2069487463790587, 'c_2': 1596}
{'score_exp': 3.233811922622582, 'c_1': 0.8705138537616335, 'c_2': 17261}
{'score_exp': 2.676635779041269, 'c_1': 0.5951919820943068, 'c_2': 10184}
{'score_exp': 2.2075751176884735, 'c_1': 1.2974073107483504, 'c_2': 84727}
{'score_exp': 2.870655546745196, 'c_1': 0.8055586089420715, 'c_2': 9624}
{'score_exp': 3.9856745435150778, 'c_1': 0.8155848842296198, 'c_2': 6317}
{'score_exp': 1.4056169508130143, 'c_1': 0.7738371282370062, 'c_2': 12409}
{'score_exp': 1.0743233616558903, 'c_1': 1.7688289031244118, 'c_2': 89283}
{'score_exp': 3.481856182231436, 'c_1': 1.0252471630795175, 'c_2': 40338}
{'score_exp': 1.5419119969740782, 'c_1': 1.8458439934532447, 'c_2': 2921}
{'score_exp': 1.4886013569048901, 'c_1': 1.140703454289804, 'c_2': 3594}
{'score_exp': 2.5674476202366554, 'c_1': 1.253406471776142, 'c_2': 3348}
{'score_exp': 3.6112531000103187, 'c_1': 1.721245563755075, 'c_2': 1410}
{'score_exp': 1.568594191730147, 'c_1':

In [7]:
def play_game(agent_id_1, agent_id_2):
    env = make("connectx", debug=False, configuration=config)
    env.reset()
    env.run([agents[agent_id_1][0], agents[agent_id_2][0]])
    return env.state[0]['reward'], env.state[1]['reward']

In [8]:
games = [(i, j) for i in range(0, num_agents) for j in range(0, num_agents) if i != j]

In [None]:
def f(pair):
    return play_game(pair[0], pair[1])

with Pool(11) as p:
    results = p.map(f, games)

In [None]:
points = defaultdict(lambda: 0)

for r, pair in zip(results, games):
    r_1, r_2 = r
    a_1, a_2 = pair
    if r_1 is None:
        r_1 = 0.5
    if r_2 is None:
        r_2 = 0.5
    points[a_1] += r_1
    points[a_2] += r_2

In [None]:
best_agent, best_score = max(points.items(), key=itemgetter(1))
best_score

In [None]:
agents[best_agent][1]