As we have only limited number of matches played per day, we might want to evaluate our agents faster locally. There were notebooks where agents played one with another, thus number of games was $O(N^2)$. But as number of agents grows, it becomes quite long to wait. 

So here I decided to implement something similar to the official matchmaking and scoring. I don't know exactly how scores are calculated, but I used *TrueSkill* ratings (there is really good introduction to it and python package http://www.moserware.com/2010/03/computing-your-skill.html), based on this description I believe it might be quite similar to what used in the official competition. 

For comparison I added simple **win_rate** progress. You can see, that while ratings are quite noisy even after 1000 games, **win_rate** converges much beter. I didn't implement all nuances of current matchmaking and scoring, so in reality it might be different, but what we observe so far, it is noisy indeed. 

If you have ideas how to make scoring closer to the official implementaion or how to enhance it, please let me know :)

**UPD:**
I have found that setting TrueSkill _tau_ parameter to 1/1000 of _sigma_ instead of recommended 1/100 produces much smoother results. So after enough games the scores are quite well separated and win_rate and more sophisticated rating calculations look pretty similar. The problem is that our agents play only about 5-6 episodes a day or 150-200 a month and that might be not enough, especially for agents submitted at a later time.

![](https://i.imgur.com/guoZA8x.png)

In [None]:
import os
import random

from glob import glob
from collections import defaultdict
from multiprocessing import Pool, cpu_count, set_start_method
from tqdm.auto import tqdm
from trueskill import Rating, TrueSkill, rate_1vs1
from tensorboardX import SummaryWriter
from scipy.special import comb
from dataclasses import dataclass

# Some constants

In [None]:
ROUNDS = 1000
MATCHES_PER_BOT = 10              # increase it    
OUTPUT_DIR = 'runs'               # tensorboard logs folder
BOTS_DIR = 'bots'                 # store all your bots in this folder
PROCESSES = cpu_count()
COPIES = defaultdict(lambda: 1)
COPIES['random_.py'] = 3          # create 3 copies of random bot for more fun

RATING_MU = 600
RATING_SIGMA = 200
RATING_BETA = RATING_SIGMA / 2    # if distance between 2 ratings is beta points
                                  # then probability of winning is 76%
                                  # recommended value for TrueSkill rating 
                                  # is sigma / 2

RATING_TAU = RATING_SIGMA / 1000  # how much most recent results have impact
                                  # on rating
                                  # recommended value for TrueSkill rating
                                  # is 1/100 of sigma, but I have found that 1/1000
                                  # of sigma produces much smoother results
                
DRAW_PROB = comb(ROUNDS, ROUNDS // 2) / 2 ** ROUNDS    

In [None]:
set_start_method('fork')

# set TrueSkill defaults
TrueSkill(RATING_MU, RATING_SIGMA, RATING_BETA, RATING_TAU, DRAW_PROB).make_as_global(); 

# Bots definition

In [None]:
!mkdir bots

In [None]:
%%writefile bots/rock.py
def rock(observation, configuration):
    return 0

In [None]:
%%writefile bots/paper.py
def paper(observation, configuration):
    return 1

In [None]:
%%writefile bots/scissors.py
def scissors(observation, configuration):
    return 2

In [None]:
%%writefile bots/counter_reactionary.py
import random

def copy_opponent(observation, configuration):
    if observation.step > 0:
        return observation.lastOpponentAction
    else:
        return random.randrange(3)

In [None]:
%%writefile bots/copy_opponent.py
import random
from kaggle_environments.envs.rps.utils import get_score

last_counter_action = None

def counter_reactionary(observation, configuration):
    global last_counter_action
    if observation.step == 0:
        last_counter_action = random.randrange(0, 3)
    elif get_score(last_counter_action, observation.lastOpponentAction) == 1:
        last_counter_action = (last_counter_action + 2) % 3
    else:
        last_counter_action = (observation.lastOpponentAction + 1) % 3

    return last_counter_action

In [None]:
%%writefile bots/random_.py
import random

def bot(o, c):
    return random.randrange(3)

In [None]:
%%writefile bots/reactionary.py
import random
from kaggle_environments.envs.rps.utils import get_score

last_react_action = None


def reactionary(observation, configuration):
    global last_react_action
    if observation.step == 0:
        last_react_action = random.randrange(3)
    elif get_score(last_react_action, observation.lastOpponentAction) <= 1:
        last_react_action = (observation.lastOpponentAction + 1) % 3

    return last_react_action

In [None]:
%%writefile bots/statistical.py
action_histogram = {}

def statistical(observation, configuration):
    global action_histogram
    if observation.step == 0:
        action_histogram = {}
        return 0
    action = observation.lastOpponentAction
    if action not in action_histogram:
        action_histogram[action] = 0
    action_histogram[action] += 1
    mode_action = None
    mode_action_count = None
    for k, v in action_histogram.items():
        if mode_action_count is None or v > mode_action_count:
            mode_action = k
            mode_action_count = v
            continue

    return (mode_action + 1) % 3

In [None]:
%%writefile bots/markov_agent.py
import random
import numpy as np
from collections import defaultdict


def markov_agent(observation, _):
    k = 2
    global table, action_seq
    if observation.step % 250 == 0:  # refresh table every 250 steps
        action_seq, table = [], defaultdict(lambda: np.ones((3,), 'int32'))
    if len(action_seq) <= 2 * k + 1:
        action = random.randrange(3)
        if observation.step > 0:
            action_seq.extend([observation.lastOpponentAction, action])
        else:
            action_seq.append(action)
        return action
    # update table
    key = ''.join([str(a) for a in action_seq[:-1]])
    table[key][observation.lastOpponentAction] += 1
    # update action seq
    action_seq[:-2] = action_seq[2:]
    action_seq[-2] = observation.lastOpponentAction
    # predict opponent next move
    key = ''.join([str(a) for a in action_seq[:-1]])
    if observation.step < 500:
        next_opponent_action_pred = table[key].argmax()
    else:
        # add stochasticity for second part of the game
        next_opponent_action_pred = np.random.choice(3, p=table[key] / table[key].sum())
    # make an action
    action = (next_opponent_action_pred + 1) % 3
    # if high probability to lose -> let's surprise our opponent with sudden change of our strategy
    if observation.step > 900:
        action = next_opponent_action_pred
    action_seq[-1] = action
    return action

In [None]:
%%writefile bots/memory_patters.py

import random

# how many steps in a row are in the pattern (multiplied by two)
memory_length = 6
# current memory of the agent
current_memory = []
# list of memory patterns
memory_patterns = []

def find_pattern(memory):
    """ find appropriate pattern in memory """
    for pattern in memory_patterns:
        actions_matched = 0
        for i in range(memory_length):
            if pattern["actions"][i] == memory[i]:
                actions_matched += 1
            else:
                break
        # if memory fits this pattern
        if actions_matched == memory_length:
            return pattern
    # appropriate pattern not found
    return None

def memory_patterns_(obs, conf):
    global memory_length, memory_patterns, current_memory
    # if it's not first step, add opponent's last action to agent's current memory
    if obs.step > 0:
        current_memory.append(obs.lastOpponentAction)
    else:
        memory_length = 6
        current_memory = []
        memory_patterns = []

    # if length of current memory is bigger than necessary for a new memory pattern
    if len(current_memory) > memory_length:
        # get momory of the previous step
        previous_step_memory = current_memory[:memory_length]
        previous_pattern = find_pattern(previous_step_memory)
        if previous_pattern == None:
            previous_pattern = {
                "actions": previous_step_memory.copy(),
                "opp_next_actions": [
                    {"action": 0, "amount": 0, "response": 1},
                    {"action": 1, "amount": 0, "response": 2},
                    {"action": 2, "amount": 0, "response": 0}
                ]
            }
            memory_patterns.append(previous_pattern)
        for action in previous_pattern["opp_next_actions"]:
            if action["action"] == obs.lastOpponentAction:
                action["amount"] += 1
        # delete first two elements in current memory (actions of the oldest step in current memory)
        del current_memory[:2]
    my_action = random.randint(0, 2)
    pattern = find_pattern(current_memory)
    if pattern != None:
        my_action_amount = 0
        for action in pattern["opp_next_actions"]:
            # if this opponent's action occurred more times than currently chosen action
            # or, if it occured the same amount of times, choose action randomly among them
            if (action["amount"] > my_action_amount or
                    (action["amount"] == my_action_amount and random.random() > 0.5)):
                my_action_amount = action["amount"]
                my_action = action["response"]
    current_memory.append(my_action)
    return my_action

In [None]:
%%writefile bots/iocaine.py
import random

def recall(age, hist):
    """Looking at the last 'age' points in 'hist', finds the
    last point with the longest similarity to the current point,
    returning 0 if none found."""
    end, length = 0, 0
    for past in range(1, min(age + 1, len(hist) - 1)):
        if length >= len(hist) - past: break
        for i in range(-1 - length, 0):
            if hist[i - past] != hist[i]: break
        else:
            for length in range(length + 1, len(hist) - past):
                if hist[-past - length - 1] != hist[-length - 1]: break
            else:
                length += 1
            end = len(hist) - past
    return end


def beat(i):
    return (i + 1) % 3


def loseto(i):
    return (i - 1) % 3


class Stats:
    """Maintains three running counts and returns the highest count based
         on any given time horizon and threshold."""

    def __init__(self):
        self.sum = [[0, 0, 0]]

    def add(self, move, score):
        self.sum[-1][move] += score

    def advance(self):
        self.sum.append(self.sum[-1])

    def max(self, age, default, score):
        if age >= len(self.sum):
            diff = self.sum[-1]
        else:
            diff = [self.sum[-1][i] - self.sum[-1 - age][i] for i in range(3)]
        m = max(diff)
        if m > score: return diff.index(m), m
        return default, score


class Predictor:
    """The basic iocaine second- and triple-guesser.    Maintains stats on the
         past benefits of trusting or second- or triple-guessing a given strategy,
         and returns the prediction of that strategy (or the second- or triple-
         guess) if past stats are deviating from zero farther than the supplied
         "best" guess so far."""

    def __init__(self):
        self.stats = Stats()
        self.lastguess = -1

    def addguess(self, lastmove, guess):
        if lastmove != -1:
            diff = (lastmove - self.prediction) % 3
            self.stats.add(beat(diff), 1)
            self.stats.add(loseto(diff), -1)
            self.stats.advance()
        self.prediction = guess

    def bestguess(self, age, best):
        bestdiff = self.stats.max(age, (best[0] - self.prediction) % 3, best[1])
        return (bestdiff[0] + self.prediction) % 3, bestdiff[1]


ages = [1000, 100, 10, 5, 2, 1]


class Iocaine:

    def __init__(self):
        """Build second-guessers for 50 strategies: 36 history-based strategies,
             12 simple frequency-based strategies, the constant-move strategy, and
             the basic random-number-generator strategy.    Also build 6 meta second
             guessers to evaluate 6 different time horizons on which to score
             the 50 strategies' second-guesses."""
        self.predictors = []
        self.predict_history = self.predictor((len(ages), 2, 3))
        self.predict_frequency = self.predictor((len(ages), 2))
        self.predict_fixed = self.predictor()
        self.predict_random = self.predictor()
        self.predict_meta = [Predictor() for a in range(len(ages))]
        self.stats = [Stats() for i in range(2)]
        self.histories = [[], [], []]

    def predictor(self, dims=None):
        """Returns a nested array of predictor objects, of the given dimensions."""
        if dims: return [self.predictor(dims[1:]) for i in range(dims[0])]
        self.predictors.append(Predictor())
        return self.predictors[-1]

    def move(self, them):
        """The main iocaine "move" function."""

        # histories[0] stores our moves (last one already previously decided);
        # histories[1] stores their moves (last one just now being supplied to us);
        # histories[2] stores pairs of our and their last moves.
        # stats[0] and stats[1] are running counters our recent moves and theirs.
        if them != -1:
            self.histories[1].append(them)
            self.histories[2].append((self.histories[0][-1], them))
            for watch in range(2):
                self.stats[watch].add(self.histories[watch][-1], 1)

        # Execute the basic RNG strategy and the fixed-move strategy.
        rand = random.randrange(3)
        self.predict_random.addguess(them, rand)
        self.predict_fixed.addguess(them, 0)

        # Execute the history and frequency stratgies.
        for a, age in enumerate(ages):
            # For each time window, there are three ways to recall a similar time:
            # (0) by history of my moves; (1) their moves; or (2) pairs of moves.
            # Set "best" to these three timeframes (zero if no matching time).
            best = [recall(age, hist) for hist in self.histories]
            for mimic in range(2):
                # For each similar historical moment, there are two ways to anticipate
                # the future: by mimicing what their move was; or mimicing what my
                # move was.    If there were no similar moments, just move randomly.
                for watch, when in enumerate(best):
                    if not when:
                        move = rand
                    else:
                        move = self.histories[mimic][when]
                    self.predict_history[a][mimic][watch].addguess(them, move)
                # Also we can anticipate the future by expecting it to be the same
                # as the most frequent past (either counting their moves or my moves).
                mostfreq, score = self.stats[mimic].max(age, rand, -1)
                self.predict_frequency[a][mimic].addguess(them, mostfreq)

        # All the predictors have been updated, but we have not yet scored them
        # and chosen a winner for this round.    There are several timeframes
        # on which we can score second-guessing, and we don't know timeframe will
        # do best.    So score all 50 predictors on all 6 timeframes, and record
        # the best 6 predictions in meta predictors, one for each timeframe.
        for meta, age in enumerate(ages):
            best = (-1, -1)
            for predictor in self.predictors:
                best = predictor.bestguess(age, best)
            self.predict_meta[meta].addguess(them, best[0])

        # Finally choose the best meta prediction from the final six, scoring
        # these against each other on the whole-game timeframe. 
        best = (-1, -1)
        for meta in range(len(ages)):
            best = self.predict_meta[meta].bestguess(len(self.histories[0]), best)

            # We've picked a next move.    Record our move in histories[0] for next time.
        self.histories[0].append(best[0])

        # And return it.
        return best[0]


iocaine = None


def iocaine(observation, configuration):
    global iocaine
    if observation.step == 0:
        iocaine = Iocaine()
        act = iocaine.move(-1)
    else:
        act = iocaine.move(observation.lastOpponentAction)

    return act


In [None]:
@dataclass
class Observation:
    lastOpponentAction: int
    step: int


@dataclass
class Configuration:
    episode_steps: int = ROUNDS
    signs: int = 3

REWARDS = [[0, -1, 1], [1, 0, -1], [-1, 1, 0]]

class Bot:
    def __init__(self, path, name):
        self.path = path
        self.name = name
        self.step = 0
        self._load(path)

    def _load(self, path):
        with open(path) as f:
            code = f.read()
        env = {}
        code = compile(code, "<string>", "exec")
        exec(code, env)
        self.bot_method = [v for v in env.values() if callable(v)][-1]

    def move(self, prev):
        c = Configuration()
        o = Observation(lastOpponentAction=prev, step=self.step)
        self.step += 1
        return self.bot_method(o, c)


class Stats:
    def __init__(self, bots):
        self.bots = bots
        self.n_bots = n = len(bots)
        self.matches = [0] * n
        self.wins = [0] * n
        self.ties = [0] * n
        self.loses = [0] * n
        self.win_rate = [0] * n
        self.ratings = [Rating() for _ in range(n)]
        self.writers = [SummaryWriter(os.path.join(OUTPUT_DIR, b['name']), flush_secs=10) for b in bots]

    def update(self, bot1, bot2, reward):
        self.matches[bot1] += 1
        self.matches[bot2] += 1
        if reward > 0:
            self.wins[bot1] += 1
            self.loses[bot2] += 1
            self.ratings[bot1], self.ratings[bot2] = rate_1vs1(self.ratings[bot1], self.ratings[bot2])
        elif reward < 0:
            self.wins[bot2] += 1
            self.loses[bot1] += 1
            self.ratings[bot2], self.ratings[bot1] = rate_1vs1(self.ratings[bot2], self.ratings[bot1])
        else:
            self.ties[bot1] += 1
            self.ties[bot2] += 1
            self.ratings[bot1], self.ratings[bot2] = rate_1vs1(self.ratings[bot1], self.ratings[bot2], drawn=True)
        self.win_rate[bot1] = self.wins[bot1] / self.matches[bot1]
        self.win_rate[bot2] = self.wins[bot2] / self.matches[bot2]
        self.writers[bot1].add_scalar('win_rate', self.win_rate[bot1], self.matches[bot1])
        self.writers[bot2].add_scalar('win_rate', self.win_rate[bot2], self.matches[bot2])
        self.writers[bot1].add_scalar('rating_mu', self.ratings[bot1].mu, self.matches[bot1])
        self.writers[bot2].add_scalar('rating_mu', self.ratings[bot2].mu, self.matches[bot2])
        self.writers[bot1].add_scalar('rating_sigma', self.ratings[bot1].sigma, self.matches[bot1])
        self.writers[bot2].add_scalar('rating_sigma', self.ratings[bot2].sigma, self.matches[bot2])
        self.writers[bot1].add_scalar('matches_played', self.matches[bot1], self.matches[bot1])
        self.writers[bot2].add_scalar('matches_played', self.matches[bot2], self.matches[bot2])
        self.writers[bot1].add_scalar('wins', self.wins[bot1], self.matches[bot1])
        self.writers[bot2].add_scalar('wins', self.wins[bot2], self.matches[bot2])
        self.writers[bot1].add_scalar('ties', self.ties[bot1], self.matches[bot1])
        self.writers[bot2].add_scalar('ties', self.ties[bot2], self.matches[bot2])
        self.writers[bot1].add_scalar('loses', self.loses[bot1], self.matches[bot1])
        self.writers[bot2].add_scalar('loses', self.loses[bot2], self.matches[bot2])
        
def run_match(bot1, bot2):
    reward = 0
    prev = [None, None]
    for i in range(ROUNDS):
        prev = [bot1.move(prev[1]), bot2.move(prev[0])]
        reward += REWARDS[prev[0]][prev[1]]
    return reward


def matchmaking(n_bots, n_matches=None):
    while 1:
        i1 = random.randrange(n_bots)
        i2 = random.randrange(n_bots)
        if i1 != i2:
            yield i1, i2
            if n_matches is not None:
                n_matches -= 1
                if n_matches <= 0:
                    break


def worker(bot_indexes):
    match_bots = [Bot(path=bots[i]['path'], name=bots[i]['name']) for i in bot_indexes]
    return bot_indexes, run_match(*match_bots)

In [None]:
# unfortunately kaggle tensorboard extension is not working see https://www.kaggle.com/product-feedback/89671
# %load_ext tensorboard
# %tensorboard --logdir=runs

# that is why we have to use ngrok to proxy requests
!wget -q https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip -q ngrok-stable-linux-amd64.zip

In [None]:
os.system(f'tensorboard --logdir {OUTPUT_DIR} &')
os.system('./ngrok http 6006 &')
!sleep 2 && curl -s http://localhost:4040/api/tunnels | python3 -c "import sys, json; print('Tensorboard link', json.load(sys.stdin)['tunnels'][0]['public_url'])"
!echo Note that it might take a few minutes for initial tensorboard load because it loads a few MB script bundle, so be patient

In [None]:
# Clear logs folder for repeated runs
!rm -rf runs

In [None]:
bots = []
for b in glob(BOTS_DIR + '/*.py'):
    name = os.path.basename(b)
    for i in range(COPIES[name]):
        bots.append(dict(path=b, name=f'{name}_{i:02d}'))

print('Number of bots', len(bots))
print('Processes', PROCESSES)
print('Rounds per match', ROUNDS)
print('Matches per bot', MATCHES_PER_BOT)

stats = Stats(bots)

total_matches = len(bots) * MATCHES_PER_BOT // 2

with Pool(PROCESSES) as pool:
    for res in tqdm(pool.imap_unordered(worker, matchmaking(len(bots), total_matches)), total=total_matches):
        stats.update(res[0][0], res[0][1], res[1])