<img src="https://image.freepik.com/free-vector/all-seeing-eye-pyramid-symbol-engraving-tattoo-style-freemason-spiritual-illuminati-religion-triangle-magic_1284-41377.jpg" width="300">

Maybe looking at distributions of action probabilities for geo and geo beater with and without 1/9 phase shift can give us some insights. What is clearly seen that 1/9 shift somehow perturbs probabilities quite a lot, while without it they are almost flat. Another interesting pattern is that in ternary space those probabilities lie in a smaller rotated triangle. Actually looks like there are many triangle orbits. Moreover the rotation angle is the same from game to game for original bot and its beater.

Some questions for further exploration:
- How different phase shifts change distribution look
- Why 1/9 performs best?
- Can we do better?
- Can this trick be applied to other bots?

In [None]:
!pip install python-ternary

In [None]:
import os
import ternary
import numpy as np
import cmath
import math
import matplotlib.pyplot as plt
import pandas as pd
from dataclasses import dataclass

%config InlineBackend.figure_format = 'retina'

In [None]:
%%writefile geo.py
import operator
import numpy as np
import cmath
from typing import List
from collections import namedtuple
import traceback
import sys

basis = np.array(
    [1, cmath.exp(2j * cmath.pi * 1 / 3), cmath.exp(2j * cmath.pi * 2 / 3)]
)

HistMatchResult = namedtuple("HistMatchResult", "idx length")
output = None
probs = None

def find_all_longest(seq, max_len=None) -> List[HistMatchResult]:
    """
    Find all indices where end of `seq` matches some past.
    """
    result = []

    i_search_start = len(seq) - 2

    while i_search_start > 0:
        i_sub = -1
        i_search = i_search_start
        length = 0

        while i_search >= 0 and seq[i_sub] == seq[i_search]:
            length += 1
            i_sub -= 1
            i_search -= 1

            if max_len is not None and length > max_len:
                break

        if length > 0:
            result.append(HistMatchResult(i_search_start + 1, length))

        i_search_start -= 1

    result = sorted(result, key=operator.attrgetter("length"), reverse=True)

    return result


def probs_to_complex(p):
    return p @ basis


def _fix_probs(probs):
    """
    Put probs back into triangle. Sometimes this happens due to rounding errors or if you
    use complex numbers which are outside the triangle.
    """
    if min(probs) < 0:
        probs -= min(probs)

    probs /= sum(probs)

    return probs


def complex_to_probs(z):
    probs = (2 * (z * basis.conjugate()).real + 1) / 3
    probs = _fix_probs(probs)
    return probs


def z_from_action(action):
    return basis[action]


def sample_from_z(z):
    probs = complex_to_probs(z)
    return np.random.choice(3, p=probs)


def bound(z):
    return probs_to_complex(complex_to_probs(z))


def norm(z):
    return bound(z / abs(z))


class Pred:
    def __init__(self, *, alpha):
        self.offset = 0
        self.alpha = alpha
        self.last_feat = None

    def train(self, target):
        if self.last_feat is not None:
            offset = target * self.last_feat.conjugate()  # fixed

            self.offset = (1 - self.alpha) * self.offset + self.alpha * offset

    def predict(self, feat):
        """
        feat is an arbitrary feature with a probability on 0,1,2
        anything which could be useful anchor to start with some kind of sensible direction
        """
        feat = norm(feat)

        # offset = mean(target - feat)
        # so here we see something like: result = feat + mean(target - feat)
        # which seems natural and accounts for the correlation between target and feat
        # all RPSContest bots do no more than that as their first step, just in a different way

        result = feat * self.offset

        self.last_feat = feat

        return result


class BaseAgent:
    def __init__(self):
        self.my_hist = []
        self.opp_hist = []
        self.my_opp_hist = []
        self.outcome_hist = []
        self.step = None

    def __call__(self, obs, conf):
        global output
        try:
            if obs.step == 0:
                output = np.random.choice(3)
                return output

            self.step = obs.step

            opp = int(obs.lastOpponentAction)
            self.my_hist.append(output)
            self.my_opp_hist.append((output, opp))
            self.opp_hist.append(opp)

            outcome = {0: 0, 1: 1, 2: -1}[(output - opp) % 3]
            self.outcome_hist.append(outcome)

            output = self.action()
            return output
        except Exception:
            traceback.print_exc(file=sys.stderr)
            raise

    def action(self):
        pass


class Agent(BaseAgent):
    def __init__(self, alpha=0.01):
        super().__init__()
        self.pred = None

        self.predictor = Pred(alpha=alpha)

    def action(self):
        global probs
        self.train()

        pred = self.preds()

        return_action = sample_from_z(pred)
        probs = complex_to_probs(pred)

        return return_action

    def train(self):
        last_beat_opp = z_from_action((self.opp_hist[-1] + 1) % 3)
        self.predictor.train(last_beat_opp)

    def preds(self):
        hist_match = find_all_longest(self.my_opp_hist, max_len=20)

        if not hist_match:
            return 0

        feat = z_from_action(self.opp_hist[hist_match[0].idx])

        pred = self.predictor.predict(feat)

        return pred


agent = Agent()


def call_agent(obs, conf):
    return agent(obs, conf)

In [None]:
%%writefile geo_beater.py
import operator
import numpy as np
import cmath
from collections import namedtuple

basis = np.array([1, cmath.exp(2j * cmath.pi * 1 / 3), cmath.exp(2j * cmath.pi * 2 / 3)])
HistMatchResult = namedtuple("HistMatchResult", "idx length")


def find_all_longest(seq, max_len=None):
    result = []
    i_search_start = len(seq) - 2
    while i_search_start > 0:
        i_sub = -1
        i_search = i_search_start
        length = 0
        while i_search >= 0 and seq[i_sub] == seq[i_search]:
            length += 1
            i_sub -= 1
            i_search -= 1
            if max_len is not None and length > max_len: break
        if length > 0: result.append(HistMatchResult(i_search_start + 1, length))
        i_search_start -= 1

    return sorted(result, key=operator.attrgetter("length"), reverse=True)


def complex_to_probs(z):
    probs = (2 * (z * basis.conjugate()).real + 1) / 3
    if min(probs) < 0: probs -= min(probs)
    return probs / sum(probs)


opp_hist = []
my_opp_hist = []
offset = 0
last_feat = None
output = None
probs = None
phase_shift = 1/9


def agent(obs, conf):
    global output, opp_hist, my_opp_hist, offset, last_feat, probs

    if obs.step == 0:
        output = np.random.choice(3)
    else:
        my_opp_hist.append((obs.lastOpponentAction, output))
        opp_hist.append(output)

        if last_feat is not None:
            this_offset = (basis[(opp_hist[-1] + 1) % 3]) * last_feat.conjugate()
            offset = (1 - .01) * offset + .01 * this_offset

        hist_match = find_all_longest(my_opp_hist, 20)
        if not hist_match:
            pred = 0
        else:
            feat = basis[opp_hist[hist_match[0].idx]]
            last_feat = complex_to_probs(feat / abs(feat)) @ basis
            pred = last_feat * offset * cmath.exp(2j * cmath.pi * phase_shift)

        probs = complex_to_probs(pred)
        output = (int(np.argmax(probs)) + 1) % 3

    return output


In [None]:
%%writefile otm.py

import numpy as np
import pandas as pd
import random

T = np.zeros((3, 3))
P = np.zeros((3, 3))

# a1 is the action of the opponent 1 step ago
# a2 is the action of the opponent 2 steps ago
a1, a2 = None, None
probs = None

def transition_agent(observation, configuration):
    global T, P, a1, a2, probs
    if observation.step > 1:
        a1 = observation.lastOpponentAction
        T[a2, a1] += 1
        P = np.divide(T, np.maximum(1, T.sum(axis=1)).reshape(-1, 1))
        a2 = a1
        probs = P[a1, :]
        if np.sum(P[a1, :]) == 1:
            return int((np.random.choice(
                [0, 1, 2],
                p=P[a1, :]
            ) + 1) % 3)
        else:
            return int(np.random.randint(3))
    else:
        if observation.step == 1:
            a2 = observation.lastOpponentAction
        return int(np.random.randint(3))

In [None]:
%%writefile otm_beater.py

import numpy as np
import pandas as pd
import random

T = np.zeros((3, 3))
P = np.zeros((3, 3))

a1, a2 = None, None
last_action = None # track my action.
probs = None
phase_shift = 1/9


# Some helper functions to do probability calculations with complex numbers
import cmath

basis = np.exp(2j * cmath.pi * np.arange(3) / 3)


def probs_to_complex(p):
    return p @ basis


def _fix_probs(probs):
    """
    Put probs back into triangle. Sometimes this happens due to rounding errors or if you
    use complex numbers which are outside the triangle.
    """
    if min(probs) < 0:
        probs -= min(probs)
    probs /= sum(probs)
    return probs


def complex_to_probs(z):
    probs = (2 * (z * basis.conjugate()).real + 1) / 3
    probs = _fix_probs(probs)
    return probs


###########################################
# Original agent with modifications marked ->
###########################################

def anti_transition_agent(observation, configuration):
    global T, P, a1, a2, last_action, probs
    if observation.step > 1:
        a1 = last_action   # on me only; take mirrored view on game
        T[a2, a1] += 1
        P = np.divide(T, np.maximum(1, T.sum(axis=1)).reshape(-1, 1))
        a2 = a1
        if np.sum(P[a1, :]) == 1:
            probs = P[a1,:]
            
            ### add phase and remove random mixin
            z = probs_to_complex(probs)
            
            z *= cmath.exp(2j * cmath.pi * phase_shift)
            
            if z != 0:
                z /= abs(z)
                
            probs=complex_to_probs(z)
            ###
            
            result = int((np.random.choice(3, p=probs) + 1) % 3)
        else:
            result = int(np.random.randint(3))
    else:
        if observation.step == 1:
            a2 = last_action    # on me only
        result = int(np.random.randint(3))
        
    result = (result + 1) % 3  # beat what he would have done
        
    last_action = result
        
    return result

In [None]:
%%writefile otm_beater_constant.py

probs = None

moves = [1, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 1, 0, 1, 0, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 1, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0]

def agent(o, c):
    return moves[o.step]

In [None]:
%%writefile statistical.py
import numpy as np

action_histogram = np.zeros(3)
probs = None


def statistical(observation, configuration):
    global action_histogram, probs
    if observation.step == 0:
        return 0
    action = observation.lastOpponentAction
    action_histogram[action] += 1
    probs = action_histogram / action_histogram.sum()

    return (action_histogram.argmax() + 1) % 3

In [None]:
%%writefile statistical_beater.py

probs = None

moves = [0, 0, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1]

def agent(o, c):
    return moves[o.step]

In [None]:
@dataclass
class Observation:
    lastOpponentAction: int
    step: int


@dataclass
class Configuration:
    episode_steps: int = 1000
    signs: int = 3

REWARDS = [[0, -1, 1], [1, 0, -1], [-1, 1, 0]]

class Bot:
    def __init__(self, path, name=None):
        self.path = path
        self.step = 0
        if name is not None:
            self.name = name
        else:
            self.name = os.path.splitext(os.path.basename(path))[0]
        self._load(path)

    def _load(self, path):
        with open(path) as f:
            code = f.read()
        self.context = {}
        code = compile(code, "<string>", "exec")
        exec(code, self.context)
        self.bot_method = [v for v in self.context.values() if callable(v)][-1]

    def move(self, prev):
        c = Configuration()
        o = Observation(lastOpponentAction=prev, step=self.step)
        self.step += 1
        return self.bot_method(o, c)

def play_match(bot1, bot2, rounds, quiet=True):
    rewards = [0]
    moves = []
    prev = [None, None]
    probs1 = []
    probs2 = []
    for step in range(rounds):
        prev = [bot1.move(prev[1]), bot2.move(prev[0])]
        if bot1.context['probs'] is not None:
            probs1.append(bot1.context['probs'])
        if bot2.context['probs'] is not None:
            probs2.append(bot2.context['probs'])
        rewards.append(rewards[-1] + REWARDS[prev[0]][prev[1]])
        moves.append(prev)

    return moves, rewards, probs1, probs2

basis = np.array(
    [1, cmath.exp(2j * cmath.pi * 1 / 3), cmath.exp(2j * cmath.pi * 2 / 3)]
)

def probs_to_complex(p):
    return p @ basis

def plot_rewards(df_rewards, title):
    df_rewards.plot(figsize=(15, 5), title=title, legend=False)
    plt.show()

def plot_probs_area(df_probs, title):
    if (len(df_probs)) > 0:
        df_probs.plot(figsize=(15, 5), kind='area', title=title, linewidth=0)
        plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
        plt.show()
    
def plot_probs_ternary(df_probs, title):
    if (len(df_probs)) > 0:
        figure, tax = ternary.figure(scale=1)
        figure.set_size_inches(7, 7)

        tax.gridlines(multiple=0.1)

        tax.right_corner_label('P', fontsize=20)
        tax.top_corner_label('S', fontsize=20)
        tax.left_corner_label('R', fontsize=20)
        tax.scatter(df_probs.values, linewidth=.5, s=3, c=np.arange(len(df_probs)) / len(df_probs))
        tax.ticks(axis='lbr', multiple=0.1, linewidth=1, tick_formats="%.1f", offset=0.02)

        tax.get_axes().axis('off')
        tax.clear_matplotlib_ticks()
        plt.title(title, pad=50, fontsize=16)
        tax.boundary(linewidth=0.5)
        tax.show()
    
def plot_probs_product(df_probs_original, df_probs_beater, original_bot_name, beater_bot_name):
    df_probs_original.eval('R * P * S').plot(figsize=(15, 5), title='Product of action probabilities (R*P*S)')
    if (len(df_probs_beater)) > 0:
        df_probs_beater.eval('R * P * S').plot()
    plt.legend([original_bot_name, beater_bot_name], loc='center left', bbox_to_anchor=(1.0, 0.5))
    plt.show()
    
def make_plots(original_bot, beater_bot, num_rounds, plot_original=True, plot_beater=True):
    moves, rewards, probs_beater, probs_original = play_match(beater_bot, original_bot, num_rounds)
    df_probs_original = pd.DataFrame(probs_original, columns=list('RPS'))
    df_probs_beater = pd.DataFrame(probs_beater, columns=list('RPS'))
    df_rewards = pd.DataFrame(rewards, columns=['rewards'])
    plot_rewards(df_rewards, f'{beater_bot.name} reward')
    plot_probs_area(df_probs_original, f'{original_bot.name} action probabilities distribution')
    plot_probs_area(df_probs_beater, f'{beater_bot.name} action probabilities distribution')
    plot_probs_ternary(df_probs_original, f'{original_bot.name} action probabilities distribution')
    plot_probs_ternary(df_probs_beater, f'{beater_bot.name} action probabilities distribution')
    plot_probs_product(df_probs_original, df_probs_beater, original_bot.name, beater_bot.name)

## geo vs geo_beater without phase shift

In [None]:
PHASE_SHIFT = 0
original_bot = Bot('geo.py')
beater_bot = Bot('geo_beater.py', name=f'geo_beater with {PHASE_SHIFT:+.3f}')
beater_bot.context['phase_shift'] = PHASE_SHIFT
make_plots(original_bot, beater_bot, 1000)

## geo vs geo_beater with +1/9 phase shift

In [None]:
PHASE_SHIFT = 1/9
original_bot = Bot('geo.py')
beater_bot = Bot('geo_beater.py', name=f'geo_beater with {PHASE_SHIFT:+.3f}')
beater_bot.context['phase_shift'] = PHASE_SHIFT
make_plots(original_bot, beater_bot, 1000)

## geo vs geo_beater with +1/7 phase shift

In [None]:
PHASE_SHIFT = 1/7
original_bot = Bot('geo.py')
beater_bot = Bot('geo_beater.py', name=f'geo_beater with {PHASE_SHIFT:+.3f}')
beater_bot.context['phase_shift'] = PHASE_SHIFT
make_plots(original_bot, beater_bot, 1000)

## geo vs geo_beater with +1/11 phase shift

In [None]:
PHASE_SHIFT = 1/11
original_bot = Bot('geo.py')
beater_bot = Bot('geo_beater.py', name=f'geo_beater with {PHASE_SHIFT:+.3f}')
beater_bot.context['phase_shift'] = PHASE_SHIFT
make_plots(original_bot, beater_bot, 1000)

## geo vs geo_beater with +1/6 phase shift

In [None]:
PHASE_SHIFT = 1/6
original_bot = Bot('geo.py')
beater_bot = Bot('geo_beater.py', name=f'geo_beater with {PHASE_SHIFT:+.3f}')
beater_bot.context['phase_shift'] = PHASE_SHIFT
make_plots(original_bot, beater_bot, 1000)

## otm vs otm_beater without phase shift

In [None]:
PHASE_SHIFT = 0
original_bot = Bot('otm.py')
beater_bot = Bot('otm_beater.py', name=f'otm_beater with {PHASE_SHIFT:+.3f}')
beater_bot.context['phase_shift'] = PHASE_SHIFT
make_plots(original_bot, beater_bot, 1000)

## otm vs otm_beater with +1/9 phase shift

In [None]:
PHASE_SHIFT = 1/9
original_bot = Bot('otm.py')
beater_bot = Bot('otm_beater.py', name=f'otm_beater with {PHASE_SHIFT:+.3f} phase shift')
beater_bot.context['phase_shift'] = PHASE_SHIFT
make_plots(original_bot, beater_bot, 1000)

## otm vs otm_beater_constant

In [None]:
original_bot = Bot('otm.py')
beater_bot = Bot('otm_beater_constant.py')
make_plots(original_bot, beater_bot, 999)

## otm vs geo

In [None]:
bot1 = Bot('otm.py')
bot2 = Bot('geo.py')
make_plots(bot1, bot2, 1000)

## statistical vs statistical_beater

In [None]:
original_bot = Bot('statistical.py')
beater_bot = Bot('statistical_beater.py')
make_plots(original_bot, beater_bot, 1000)