In [None]:
%%writefile my_agent.py

from collections import defaultdict
import operator
import random
import numpy as np
from sklearn.neural_network import MLPClassifier
import operator
import numpy as np
import cmath
from typing import List
from collections import namedtuple
import traceback
import sys


basis = np.array(
    [1, cmath.exp(2j * cmath.pi * 1 / 3), cmath.exp(2j * cmath.pi * 2 / 3)]
)


HistMatchResult = namedtuple("HistMatchResult", "idx length")


def find_all_longest(seq, max_len=None) -> List[HistMatchResult]:
    """
    Find all indices where end of `seq` matches some past.
    """
    result = []

    i_search_start = len(seq) - 2

    while i_search_start > 0:
        i_sub = -1
        i_search = i_search_start
        length = 0

        while i_search >= 0 and seq[i_sub] == seq[i_search]:
            length += 1
            i_sub -= 1
            i_search -= 1

            if max_len is not None and length > max_len:
                break

        if length > 0:
            result.append(HistMatchResult(i_search_start + 1, length))

        i_search_start -= 1

    result = sorted(result, key=operator.attrgetter("length"), reverse=True)

    return result


def probs_to_complex(p):
    return p @ basis


def _fix_probs(probs):
    """
    Put probs back into triangle. Sometimes this happens due to rounding errors or if you
    use complex numbers which are outside the triangle.
    """
    if min(probs) < 0:
        probs -= min(probs)

    probs /= sum(probs)

    return probs


def complex_to_probs(z):
    probs = (2 * (z * basis.conjugate()).real + 1) / 3
    probs = _fix_probs(probs)
    return probs


def z_from_action(action):
    return basis[action]


def sample_from_z(z):
    probs = complex_to_probs(z)
    return np.random.choice(3, p=probs)


def bound(z):
    return probs_to_complex(complex_to_probs(z))


def norm(z):
    return bound(z / abs(z))


class Pred:
    def __init__(self, *, alpha):
        self.offset = 0
        self.alpha = alpha
        self.last_feat = None

    def train(self, target):
        if self.last_feat is not None:
            offset = target * self.last_feat.conjugate()   # fixed

            self.offset = (1 - self.alpha) * self.offset + self.alpha * offset

    def predict(self, feat):
        """
        feat is an arbitrary feature with a probability on 0,1,2
        anything which could be useful anchor to start with some kind of sensible direction
        """
        feat = norm(feat)

        # offset = mean(target - feat)
        # so here we see something like: result = feat + mean(target - feat)
        # which seems natural and accounts for the correlation between target and feat
        # all RPSContest bots do no more than that as their first step, just in a different way
        
        result = feat * self.offset

        self.last_feat = feat

        return result
    
    
class BaseAgent:
    def __init__(self):
        self.my_hist = []
        self.opp_hist = []
        self.my_opp_hist = []
        self.outcome_hist = []
        self.step = None

    def __call__(self, obs, conf):
        try:
            if obs.step == 0:
                action = np.random.choice(3)
                self.my_hist.append(action)
                return action

            self.step = obs.step

            opp = int(obs.lastOpponentAction)
            my = self.my_hist[-1]

            self.my_opp_hist.append((my, opp))
            self.opp_hist.append(opp)

            outcome = {0: 0, 1: 1, 2: -1}[(my - opp) % 3]
            self.outcome_hist.append(outcome)

            action = self.action()

            self.my_hist.append(action)

            return action
        except Exception:
            traceback.print_exc(file=sys.stderr)
            raise

    def action(self):
        pass


class Agent(BaseAgent):
    def __init__(self, alpha=0.01):
        super().__init__()

        self.predictor = Pred(alpha=alpha)

    def action(self):
        self.train()

        pred = self.preds()

        return_action = sample_from_z(pred)

        return return_action

    def train(self):
        last_beat_opp = z_from_action((self.opp_hist[-1] + 1) % 3)
        self.predictor.train(last_beat_opp)

    def preds(self):
        hist_match = find_all_longest(self.my_opp_hist, max_len=20)

        if not hist_match:
             return 0

        feat = z_from_action(self.opp_hist[hist_match[0].idx])

        pred = self.predictor.predict(feat)

        return pred
    
    

class GeomAgent:
    def __init__(self):
        self.agent = Agent()
    def action(self, state):
        return self.agent(state["obs"], state["conf"])

#def call_agent(obs, conf):
#    return agent(obs, conf)

RANDOM_PROBA = 0. / 3
WINDOW_SIZE = 20
HIDDEN_LAYERS = (100, 100)


def make_X(start, stop):
    X1 = state["moves"]["my"][start:stop]
    X2 = state["moves"]["his"][start:stop]
    
    X1 = np.eye(3)[np.array(X1)].ravel()
    X2 = np.eye(3)[np.array(X2)].ravel()
    
    return np.array([np.hstack([X1, X2])])

class MLPAgent:
    def __init__(self, layers=HIDDEN_LAYERS, stochastic=True):
        self.clf = MLPClassifier(hidden_layer_sizes=layers)
        self.stochastic = stochastic
    def action(self, state):
        if len(state["moves"]["my"]) <= WINDOW_SIZE:
            my = random.choice([0,1,2])
        else:
            X = make_X(-WINDOW_SIZE-1, -1)
            y = np.array([state["moves"]["his"][-1]])

            self.clf.partial_fit(X, y, classes=np.array([0,1,2]))

            X1 = make_X(-WINDOW_SIZE, None)
            his_proba =  np.array(self.clf.predict_proba(X1))[0]
            EVs = [rewards[x].dot(his_proba) for x in ["r", "p", "s"]]
            if self.stochastic:
                my = random.choices([0, 1, 2], weights=EVs, k=1)[0]
            else:
                my = int(np.argmax(EVs))
        return my

state = {}
state["moves"] = defaultdict(list)
state["freqs_1"] = np.zeros(3)
state["freqs_095"] = np.zeros(3)

state["rolls_1"] = np.zeros(3)
state["rolls_095"] = np.zeros(3)

state["mc_1"] = np.zeros((3,3))
state["mc_098"] = np.zeros((3,3))
state["mc_095"] = np.zeros((3,3))
state["mc_085"] = np.zeros((3,3))

state["tm_1"] = np.zeros((3,3))
state["tm_095"] = np.zeros((3,3))

state["freqs_1_my"] = np.zeros(3)
state["freqs_095_my"] = np.zeros(3)

state["rolls_1_my"] = np.zeros(3)
state["rolls_095_my"] = np.zeros(3)

state["mc_1_my"] = np.zeros((3,3))
state["mc_098_my"] = np.zeros((3,3))
state["mc_095_my"] = np.zeros((3,3))
state["mc_085_my"] = np.zeros((3,3))

state["tm_1_my"] = np.zeros((3,3))
state["tm_095_my"] = np.zeros((3,3))


rewards = {"r": np.array([1, 0, 2]), 
           "p": np.array([2, 1, 0]), 
           "s": np.array([0, 2, 1])}


strategies = {}
meta_strategies = {}

def getRoll(first, second):
    if second == first:
        return 0
    if second > first:
        return second - first
    else:
        return 2 - first + second + 1

def applyRoll(act, roll):
    if not roll:
        return act
    newact = act + roll
    return newact % 3

def result(my, his):
    res_roll = {0:1, 1:2, 2:0}
    roll = getRoll(his, my)
    return res_roll[roll]
    
class Rock:
    def action(self, state):
        return 0

class Paper:
    def action(self, state):
        return 1

class Scissors:
    def action(self, state):
        return 2

class Random:
    def action(self, state):
        return random.choice([0, 1, 2])

class SameRoll:
    def __init__(self, roll):
        self.roll = roll
    def action(self, state):
        return applyRoll(state["moves"]["my"][-1], self.roll)

class Freq:
    def __init__(self, key, stochastic=True):
        self.key = key
        self.stochastic = stochastic
    def action(self, state):
        mat = state[self.key]
        ev_freq = [rewards[x].dot(mat) for x in ["r", "p", "s"]]
        if self.stochastic:
            my = random.choices([0, 1, 2], weights=ev_freq, k=1)[0]
        else:
            my = int(np.argmax(ev_freq))
        return my

class Roll:
    def __init__(self, key, stochastic=True):
        self.key = key
        self.stochastic = stochastic
    def action(self, state):
        mat = state[self.key]
        #hisroll = int(np.argmax(mat))
        hisprev = state["moves"]["his"][-1]
        hismat  = np.roll(mat,  hisprev)
        ev_freq = [rewards[x].dot(hismat) for x in ["r", "p", "s"]]
        if self.stochastic:
            my = random.choices([0, 1, 2], weights=ev_freq, k=1)[0]
        else:
            my = int(np.argmax(ev_freq))
        return my

class RollMirror:
    def __init__(self, roll):
        self.roll = roll
    def action(self, state):
        return applyRoll(state["moves"]["his"][-1], self.roll)

class RollFreq:
    def __init__(self, key, stochastic=True):
        self.key = key
        self.stochastic = stochastic
    def action(self, state):
        mat_mc = state[self.key]
        rst = int(result(state["moves"]["his"][-1], state["moves"]["my"][-1] ))
        mat = mat_mc[rst] 
        hisprev = state["moves"]["his"][-1]
        hismat  = np.roll(mat,  hisprev)
        ev_freq = [rewards[x].dot(hismat) for x in ["r", "p", "s"]]
        if self.stochastic:
            my = random.choices([0, 1, 2], weights=ev_freq, k=1)[0]
        else:
            my = int(np.argmax(ev_freq))
        return my
    
class TM:
    def __init__(self, key, stochastic=True):
        self.key = key
        self.stochastic = stochastic
    def action(self, state):
        mat_mc = state[self.key]
        hismat = mat_mc[state["moves"]["his"][-1]] 
        ev_freq = [rewards[x].dot(hismat) for x in ["r", "p", "s"]]
        if self.stochastic:
            my = random.choices([0, 1, 2], weights=ev_freq, k=1)[0]
        else:
            my = int(np.argmax(ev_freq))
        return my
    
class CounterStrategy:
    def __init__(self, strategy, roll=2):
        self.strategy = strategy
        self.roll = roll

    def action(self, state):
        act = self.strategy.action(state)
        return applyRoll(act, self.roll)




'''
strategies["rock"] = Rock()
strategies["paper"] = Paper()
strategies["scissors"] = Scissors()
strategies["random"] = Random()
strategies["sameroll0"] = SameRoll(0)
strategies["sameroll1"] = SameRoll(1)
strategies["sameroll2"] = SameRoll(2)
strategies["freq1"] = Freq("freqs_1")
strategies["freq095"] = Freq("freqs_095")

strategies["rollmirror0"] = RollMirror(0)
strategies["rollmirror1"] = RollMirror(1)
strategies["rollmirror2"] = RollMirror(2)
'''

strategies["rolls1"] = Roll("rolls_1")

strategies["rolls095"] = Roll("rolls_095")

strategies["rollfreq1"] = RollFreq("mc_1")
strategies["rollfreq095"] = RollFreq("mc_095")
strategies["rollfreq098"] = RollFreq("mc_098")
strategies["rollfreq085"] = RollFreq("mc_085")

strategies["tm1"] = TM("tm_1")
strategies["tm095"] = TM("tm_095")

#strategies["random"] = Random()
#strategies["rollfreq095"] = RollFreq("mc_095")
strategies["freq095"] = Freq("freqs_095")
strategies["freq1"] = Freq("freqs_1")

strategies["mlp"] = MLPAgent()
strategies["mlp2"] = MLPAgent([100])
strategies["mlp3"] = MLPAgent([1000])
strategies["mlp4"] = MLPAgent([100, 100, 100])

strategies["geom"] = GeomAgent()

strategies["counter_freq095"] = CounterStrategy(Freq("freqs_095_my"), 1)
strategies["counter_freq1"] = CounterStrategy(Freq("freqs_1_my"), 1)

strategies["counter_tm1"] = CounterStrategy(TM("tm_1_my"), 1)
strategies["counter_tm095"] = CounterStrategy(TM("tm_095_my"), 1)

strategies["counter_rollfreq_1"] = CounterStrategy(RollFreq("mc_1_my"), 1)
strategies["counter_rollfreq_095"] = CounterStrategy(RollFreq("mc_095_my"), 1)



'''
for k in list(strategies.keys()):
    if k == "random":
        continue
    for roll in [1, 2]:
        k_new = k + f"counter_{roll}"
        strategies[k_new] = CounterStrategy(strategies[k], roll)
'''

class MetaStrategy:
    def __init__(self, nitems, EMA=0.85, meta_meta=False, verbose=False):
        self.nitems = nitems
        self.beta = EMA ** np.arange(nitems)[::-1]  
        if meta_meta:
            self.strategies = meta_strategies
        else:
            self.strategies = strategies
        self.verbose = verbose
    def best(self, state):
        his_shift = state["moves"]["his"]
        incomes = {}
        for k in self.strategies.keys():
            
            
            my = state["moves"][k][:-1]
            #if self.verbose:
            #    print(k, my[-10:], his_shift[-10:], [result(m, h) - 1 for m, h in zip(my[-10:], his_shift[-10:])],
            #         self.beta[-10:])
                
            nitems = min(len(my), self.nitems)
            nitems = min(len(his_shift), nitems)

            income = sum([(result(m, h) - 1) * b for (m, h, b) in zip(my[-nitems:], 
            his_shift[-nitems:],
            self.beta[-nitems:],
            )])
            #if self.verbose:
            #    print(income)
            incomes[k] = income
        
        k =  list(incomes.keys())
        v = [incomes[kk] for kk in k]
        v = np.clip(np.array(v), 0, None)
        if v.sum():
            p = v / v.sum()
        else:
            p = np.ones(len(v))
        #best = max(incomes.items(), key=operator.itemgetter(1))[0]
        best = random.choices(k, weights=p, k=1)[0]
        if self.verbose:
            print(best)
        #best = random.choice(k)
        #print(best, incomes[best], incomes["random"])
        return best

    
for nitems in [5, 10, 20, 50, 100, 200, 300, 500, 1000]:
    verbose=False
    #if nitems == 1000:
    #    verbose=True
    ms = MetaStrategy(nitems, 0.99, verbose=verbose)
    meta_strategies[f"meta_{nitems}"] = ms

mms = MetaStrategy(1000, 0.99, meta_meta=True)

def processOppActions(act):
    state["moves"]["his"].append(act)

    addfreq = np.zeros(3)
    addfreq[act] = 1
    state["freqs_1"] = state["freqs_1"] + addfreq
    state["freqs_095"] = state["freqs_095"] * 0.95 + addfreq

    if len(state["moves"]["his"]) > 1:
        roll = getRoll(state["moves"]["his"][-2], state["moves"]["his"][-1])
        addroll = np.zeros(3)
        addroll[roll] = 1
        state["rolls_1"] = state["rolls_1"] + addroll
        state["rolls_095"] = state["rolls_095"] * 0.95 + addroll

        rst = int(result(state["moves"]["his"][-2], state["moves"]["my"][-2] ))

        addroll = np.zeros((3, 3))
        addroll[rst, roll] = 1
        state["mc_1"] = state["mc_1"] + addroll
        state["mc_098"] = state["mc_098"] * 0.98 + addroll
        state["mc_095"] = state["mc_095"] * 0.95 + addroll
        state["mc_085"] = state["mc_085"] * 0.85 + addroll
        addroll = np.zeros((3, 3))
        addroll[state["moves"]["his"][-2], state["moves"]["his"][-1]] = 1
        state["tm_1"] = state["tm_1"] + addroll
        state["tm_095"] = state["tm_1"] * 0.95 + addroll


def processMyActions():
    #state["moves"]["his"].append(act)
    act = state["moves"]["my"][-1]
    addfreq = np.zeros(3)
    addfreq[act] = 1
    state["freqs_1_my"] = state["freqs_1_my"] + addfreq
    state["freqs_095_my"] = state["freqs_095_my"] * 0.95 + addfreq

    if len(state["moves"]["my"]) > 1 and len(state["moves"]["his"]) > 1:
        roll = getRoll(state["moves"]["my"][-2], state["moves"]["my"][-1])
        addroll = np.zeros(3)
        addroll[roll] = 1
        state["rolls_1_my"] = state["rolls_1_my"] + addroll
        state["rolls_095_my"] = state["rolls_095_my"] * 0.95 + addroll

        rst = int(result(state["moves"]["my"][-2], state["moves"]["his"][-2] ))

        addroll = np.zeros((3, 3))
        addroll[rst, roll] = 1
        state["mc_1_my"] = state["mc_1_my"] + addroll
        state["mc_098_my"] = state["mc_098_my"] * 0.98 + addroll
        state["mc_095_my"] = state["mc_095_my"] * 0.95 + addroll
        state["mc_085_my"] = state["mc_085_my"] * 0.85 + addroll
        addroll = np.zeros((3, 3))
        addroll[state["moves"]["my"][-2], state["moves"]["my"][-1]] = 1
        state["tm_1_my"] = state["tm_1_my"] + addroll
        state["tm_095_my"] = state["tm_1_my"] * 0.95 + addroll
def my_agent(obs, conf):
    state["obs"] = obs
    state["conf"] = conf
    if obs["step"] == 0:
        my = random.choice([0,1,2])
        state["moves"]["my"].append(my)
        strategies["geom"].action(state)
        processMyActions()
        return my
    processOppActions(obs["lastOpponentAction"])
    
    for k, v in strategies.items():
        mymove = v.action(state)
        state["moves"][k].append(mymove)

    for ms_name, ms in meta_strategies.items():
        best_strategy = ms.best(state)
        bestmove = state["moves"][best_strategy][-1]
        state["moves"][ms_name].append(bestmove)
    
    best_meta_strategy = mms.best(state)
    #print(best_meta_strategy)
    bestmove = state["moves"][best_meta_strategy][-1]
    random_move = int(np.random.choice(range(3)))
    bestmove = int(np.random.choice([bestmove, random_move], p=[1 - RANDOM_PROBA, RANDOM_PROBA]))
    state["moves"]["my"].append(bestmove)
    processMyActions()
    return bestmove


In [None]:
%%writefile simple_rolling.py

def my_agent(obs, conf):
    return obs["step"] % 3

In [None]:
%%writefile rolling.py

def my_agent(obs, conf):
    return (obs["step"] + int(obs["step"] / 10)) % 3

In [None]:
from kaggle_environments import evaluate, make, utils
env = make("rps", debug=True)
env.run(['my_agent.py', 'rolling.py'])
env.render(mode="ipython", width=500, height=450)