This notebook isn't my highest one (current 1079.4) but it's base architecture is almost the same as the highest.
The difference is that this notebook uses only [RPS GeometryðŸ¦‡](https://www.kaggle.com/superant/rps-geometry-silver-rank-by-minimal-logic) agents and is much cleaner.

This notebook borrowed many ideas from [Going meta with Kumoko](https://www.kaggle.com/chankhavu/going-meta-with-kumoko) and [RPS GeometryðŸ¦‡](https://www.kaggle.com/superant/rps-geometry-silver-rank-by-minimal-logic) .
Thanks!


In [None]:
%%writefile geometry.py

import operator
import numpy as np
import cmath
from typing import List
from collections import namedtuple
import traceback
import sys
import random


BEAT = [1,2,0]
CEDE = [2,0,1]

who_win = { "PP": 0, "PR":1 , "PS":-1,
            "RP": -1,"RR":0, "RS":1,
            "SP": 1, "SR":-1, "SS":0}

basis = np.array(
    [1, cmath.exp(2j * cmath.pi * 1 / 3), cmath.exp(2j * cmath.pi * 2 / 3)]
)


HistMatchResult = namedtuple("HistMatchResult", "idx length")


def find_all_longest(seq, max_len=None) -> List[HistMatchResult]:
    """
    Find all indices where end of `seq` matches some past.
    """
    result = []

    i_search_start = len(seq) - 2

    while i_search_start > 0:
        i_sub = -1
        i_search = i_search_start
        length = 0

        while i_search >= 0 and seq[i_sub] == seq[i_search]:
            length += 1
            i_sub -= 1
            i_search -= 1

            if max_len is not None and length > max_len:
                break

        if length > 0:
            result.append(HistMatchResult(i_search_start + 1, length))

        i_search_start -= 1

    result = sorted(result, key=operator.attrgetter("length"), reverse=True)

    return result


def probs_to_complex(p):
    return p @ basis


def _fix_probs(probs):
    """
    Put probs back into triangle. Sometimes this happens due to rounding errors or if you
    use complex numbers which are outside the triangle.
    """
    if min(probs) < 0:
        probs -= min(probs)

    probs /= sum(probs)

    return probs


def complex_to_probs(z):
    probs = (2 * (z * basis.conjugate()).real + 1) / 3
    probs = _fix_probs(probs)
    return probs


def z_from_action(action):
    return basis[action]


def sample_from_z(z, deterministic=False):
    probs = complex_to_probs(z)
    if deterministic:
        return int(np.argmax(probs))  
    return np.random.choice(3, p=probs)

def bound(z):
    return probs_to_complex(complex_to_probs(z))


def norm(z):
    return bound(z / abs(z))


class Pred:
    def __init__(self, *, alpha):
        self.offset = 0
        self.alpha = alpha
        self.last_feat = None

    def train(self, target):
        if self.last_feat is not None:
            offset = target * self.last_feat.conjugate()   # fixed

            self.offset = (1 - self.alpha) * self.offset + self.alpha * offset

    def predict(self, feat):
        """
        feat is an arbitrary feature with a probability on 0,1,2
        anything which could be useful anchor to start with some kind of sensible direction
        """
        feat = norm(feat)
        
        result = feat * self.offset

        self.last_feat = feat

        return result
    
    
class BaseAgent:
    def __init__(self):
        self.my_hist = []
        self.opp_hist = []
        self.my_opp_hist = []
        self.outcome_hist = []
        self.step = None

    def __call__(self, obs, conf):
        try:
            if obs.step == 0:
                action = np.random.choice(3)
                self.my_hist.append(action)
                return action

            self.step = obs.step

            opp = int(obs.lastOpponentAction)
            my = self.my_hist[-1]

            self.my_opp_hist.append((my, opp))
            self.opp_hist.append(opp)

            outcome = {0: 0, 1: 1, 2: -1}[(my - opp) % 3]
            self.outcome_hist.append(outcome)

            action = self.action()

            self.my_hist.append(action)

            return action
        except Exception:
            traceback.print_exc(file=sys.stderr)
            raise
    def action(self):
        pass
    
    def set_my_action(self, my_last_action):
        self.my_hist[-1] = my_last_action

class Agent(BaseAgent):
    def __init__(self, alpha=0.01, max_len=20, deterministic=False):
        super().__init__()
        self.max_len = max_len
        self.predictor = Pred(alpha=alpha)
        self.deterministic = deterministic

    def action(self):
        self.train()

        pred = self.preds()

        return_action = sample_from_z(pred, self.deterministic)

        return return_action

    def train(self):
        last_beat_opp = z_from_action((self.opp_hist[-1] + 1) % 3)
        self.predictor.train(last_beat_opp)

    def preds(self):
        hist_match = find_all_longest(self.my_opp_hist, max_len=self.max_len)

        if not hist_match:
             return 0

        feat = z_from_action(self.opp_hist[hist_match[0].idx])

        pred = self.predictor.predict(feat)

        return pred
    
class AttrDict(dict):
    def __getattr__(self, name):
        return self[name]
    
class AgentWrapper:
    def __init__(self, agent):
        self.agent = agent
    def __call__(self, observation, configuration, my_last_action):
        if observation.step > 0:
            self.agent.set_my_action(my_last_action)
        return self.agent(observation, configuration)

class MirrorAgentWrapper:
    def __init__(self, agent):
        self.agent = agent
    def __call__(self, observation, configuration, my_last_action):
        if observation.step == 0:
            mirror_observation = observation
        else:
            mirror_observation = AttrDict({'step': observation.step, 'lastOpponentAction': my_last_action})
            self.agent.set_my_action(observation.lastOpponentAction)
        return BEAT[self.agent(mirror_observation, configuration)]

class GeobotBeater:
    def __init__(self, alpha=0.01, max_len=20):
        self.opp_hist = []
        self.my_opp_hist = []
        self.offset = 0
        self.last_feat = None
        self.my_last_action = None
        self.alpha = alpha
        self.max_len = max_len

    def __call__(self, obs, conf):
        step = obs.step

        if step == 0:
            action = np.random.choice(3)
        else:
            our_last_action = self.my_last_action
            opp_last_action = obs["lastOpponentAction"]
            self.my_opp_hist.append((opp_last_action, our_last_action))
            self.opp_hist.append(our_last_action)
            if self.last_feat is not None:
                this_offset = (basis[(self.opp_hist[-1] + 1) % 3]) * self.last_feat.conjugate()
                self.offset = (1 - self.alpha) * self.offset + self.alpha * this_offset
            hist_match = find_all_longest(self.my_opp_hist, self.max_len)
            if not hist_match:
                pred = 0
            else:
                feat = basis[self.opp_hist[hist_match[0].idx]]
                self.last_feat = complex_to_probs(feat / abs(feat)) @ basis
                pred = self.last_feat * self.offset * cmath.exp(2j * cmath.pi * 1/9)
            probs = complex_to_probs(pred)
            if probs[np.argmax(probs)] > .334:
                action = (int(np.argmax(probs))+1)%3
            else:
                action = (np.random.choice(3, p=probs)+1)%3
        self.my_last_action = action
        return action
    
    def set_my_action(self, my_last_action):
        self.my_last_action = my_last_action
class AntiGeoAgentWrapper:
    def __init__(self, agent):
        self.agent = agent
    def __call__(self, observation, configuration, my_last_action):
        if observation.step > 0:
            self.agent.set_my_action(my_last_action)
        return self.agent(observation, configuration)

class AntiGeoMirrorAgentWrapper:
    def __init__(self, agent):
        self.agent = agent
    def __call__(self, observation, configuration, my_last_action):
        if observation.step == 0:
            mirror_observation = observation
        else:
            mirror_observation = AttrDict({'step': observation.step, 'lastOpponentAction': my_last_action})
            self.agent.set_my_action(observation.lastOpponentAction)
        return BEAT[self.agent(mirror_observation, configuration)]

class ScoringFunc:
    def __init__(self, 
                 decay=1.,
                 win_value=1.,
                 draw_value=0.,
                 lose_value=-1.,
                 drop_prob=0.,
                 drop_draw=False,
                 clip_zero=False):
        super().__init__()
        self.decay = decay
        self.win_value = win_value
        self.draw_value = draw_value
        self.lose_value = lose_value
        self.drop_prob = drop_prob
        self.drop_draw = drop_draw
        self.clip_zero = clip_zero
    def __call__(self, score, our_move, his_move):
        if our_move == his_move:
            retval = self.decay * score + self.draw_value
        elif our_move == BEAT[his_move]:
            retval = self.decay * score + self.win_value
        elif our_move == CEDE[his_move]:
            retval = self.decay * score + self.lose_value

        if self.drop_prob > 0. and random.random() < self.drop_prob:
            if our_move == CEDE[his_move]:
                score = 0.
            elif self.drop_draw and our_move == his_move:
                score = 0.

        if self.clip_zero:
            retval = max(0., retval)
        return retval

def random_agent(observation, configuration, my_last_action):
    return random.randint(0, 2)

class MixedAgent:
    def __init__(self, agents, dllu_scoring_configs, use_cede=False, use_beat=False, generate_random_dllu_scoring_configs=None, anti_geo_agents=[]):
        self.agents = agents
        self.scoring_funcs = [ScoringFunc(*cfg) for cfg in dllu_scoring_configs]
        self.last_action = None
                    
        self.score_history = []
        self.use_cede = use_cede
        self.use_beat = use_beat
        self.agent_num = len(self.agents)
        if self.use_cede:
            self.agent_num += len(self.agents)
        if self.use_beat:
            self.agent_num += len(self.agents)
        self.last_proposed_actions = [None] * self.agent_num
        self.selection_log = [0] * self.agent_num
        self.scores = 3. * np.ones(self.agent_num)
        self.valied_score_num = self.agent_num
        self.generate_random_dllu_scoring_configs = generate_random_dllu_scoring_configs


        self.anti_geo_agents = anti_geo_agents
        if len(anti_geo_agents) > 0:    
            anti_geo_dllu_scoring_configs = [
                [1.00,  1.00,    0.00,     -1.00,    0.00,      False,     False] 
            ] * len(anti_geo_agents)
            self.anti_geo_scoring_funcs = [ScoringFunc(*cfg) for cfg in anti_geo_dllu_scoring_configs]
            self.anti_geo_last_proposed_actions = [None] * (len(anti_geo_agents))
            self.anti_geo_scores = 1. * np.ones(len(self.anti_geo_last_proposed_actions))
            self.use_anti_geo = False
            
        
    def __call__(self, observation, configuration):


        if observation.step > 0:
            self.score_history.append(who_win[['R','P','S'][self.last_action]+['R','P','S'][observation.lastOpponentAction]])

#         if not self.generate_random_dllu_scoring_configs == None and  sum(self.score_history[-50:]) < -20:
#             random_dllu_scoring_configs = self.generate_random_dllu_scoring_configs()
#             self.scoring_funcs = [ScoringFunc(*cfg) for cfg in random_dllu_scoring_configs]
                
        proposed_actions = [None] * self.agent_num
        for i in range(len(self.agents)):
            cede_idx = i + len(self.agents)
            beat_idx = i + len(self.agents)*2
            if not self.use_cede and self.use_beat:
                beat_idx = i + len(self.agents)
            if observation.step > 0:
                self.scores[i] = self.scoring_funcs[i](self.scores[i], self.last_proposed_actions[i], observation.lastOpponentAction)
                if self.use_cede:
                    self.scores[cede_idx] = self.scoring_funcs[cede_idx](self.scores[cede_idx], self.last_proposed_actions[cede_idx], observation.lastOpponentAction)
                if self.use_beat:
                    self.scores[beat_idx] = self.scoring_funcs[beat_idx](self.scores[beat_idx], self.last_proposed_actions[beat_idx], observation.lastOpponentAction)
            proposed_actions[i] = self.agents[i](observation, configuration, self.last_action)
            if self.use_cede:
                proposed_actions[cede_idx] = CEDE[proposed_actions[i]]
            if self.use_beat:
                proposed_actions[beat_idx] = BEAT[proposed_actions[i]]

                
                
        ############
        if len(self.anti_geo_agents) > 0:
            anti_geo_proposed_actions = [None] * len(self.anti_geo_last_proposed_actions) 
            for i in range(len(self.anti_geo_agents)):
                if observation.step > 0:
                    self.anti_geo_scores[i] = self.anti_geo_scoring_funcs[i](self.anti_geo_scores[i], self.anti_geo_last_proposed_actions[i], observation.lastOpponentAction)
                anti_geo_proposed_actions[i] = self.anti_geo_agents[i](observation, configuration, self.last_action)
            self.anti_geo_last_proposed_actions = anti_geo_proposed_actions
            if self.use_anti_geo:
        #         best_index = random.choice([i for i, x in enumerate(anti_geo_scores) if x == max(anti_geo_scores)])
                best_index = random.choices(range(len(self.anti_geo_scores)),weights=([max(0, score) for score in self.anti_geo_scores]))[0]
                self.last_action = anti_geo_proposed_actions[best_index]
                return self.last_action
        ############
        
        
        ##########
        if len(self.anti_geo_agents) > 0 and not self.use_anti_geo and 300 < observation.step:
            max_index = np.argmax(self.anti_geo_scores)
            max_anti_geo_score = self.anti_geo_scores[max_index]
            if max_anti_geo_score > 70 or (observation.step < 600 and max_anti_geo_score > 40 and max_anti_geo_score - sum(self.score_history) > 30):
                self.use_anti_geo = True
        ##########
    
    
#         if observation.step < 25 or observation.step > 975 or (sum(self.score_history[-50:]) > 10 and random.random() < 0.50):
#             best_index = random.choice([i for i, x in enumerate(self.scores) if x == max(self.scores)])
#         else:
#             best_index = random.choices(range(len(self.scores)),weights=([max(0, score) for score in self.scores]))[0]


        best_index = random.choices(range(self.valied_score_num),weights=([max(0, score) for score in self.scores[:self.valied_score_num]]))[0]

        my_action = proposed_actions[best_index]
        self.selection_log[best_index] += 1 
        
#         if random.random() < 0.2:
# #             random.randint(0, 2)
#             my_action = CEDE[my_action]
    
#         if observation.step == 998:
#             print(self.selection_log)

        self.last_action = my_action
        self.last_proposed_actions = proposed_actions

        return my_action
    

    
# anti_geo_agents = [AntiGeoAgentWrapper(GeobotBeater(0.01, 20)), AntiGeoAgentWrapper(GeobotBeater(0.03, 20)), AntiGeoAgentWrapper(GeobotBeater(0.07, None)), AntiGeoAgentWrapper(GeobotBeater(0.12, None)),  AntiGeoAgentWrapper(GeobotBeater(0.3, None)),  AntiGeoAgentWrapper(GeobotBeater(0.8, None))]
anti_geo_agents = [AntiGeoAgentWrapper(GeobotBeater(0.01, 20)), AntiGeoAgentWrapper(GeobotBeater(0.03, 20))]

# agents = [AgentWrapper(Agent(0.01, None)), AgentWrapper(Agent(0.07, None)), AgentWrapper(Agent(0.05, 40)),AgentWrapper(Agent(0.03, 20)), AgentWrapper(Agent(0.05, 40)), AgentWrapper(Agent(0.12, None)),AgentWrapper(Agent(0.8, 20)), MirrorAgentWrapper(Agent(0.01, None)), MirrorAgentWrapper(Agent(0.07, None)),MirrorAgentWrapper(Agent(0.05, 40)),MirrorAgentWrapper(Agent(0.03, 20)), MirrorAgentWrapper(Agent(0.05, 40)), MirrorAgentWrapper(Agent(0.12, None)),MirrorAgentWrapper(Agent(0.8, 20))]
agents = [AgentWrapper(Agent(0.07, None)), AgentWrapper(Agent(0.05, 40)),AgentWrapper(Agent(0.03, 20)),AgentWrapper(Agent(0.12, None)),  MirrorAgentWrapper(Agent(0.07, None)),MirrorAgentWrapper(Agent(0.05, 40)),MirrorAgentWrapper(Agent(0.03, 20)), MirrorAgentWrapper(Agent(0.12, None))]
dllu_scoring_configs = [
    [0.60,  3.00,    0.00,     -3.00,    0.00,      False,     False]
] * len(agents) # * (len(agents)-1) + [[0.30,  3.00,    0.00,     -3.00,    0.05,      False,     True]]
dllu_scoring_configs *= 3
# def generate_random_dllu_scoring_configs():  
#     return [
#         [random.randrange(5)/10 + 0.3,  3.00,    0.00,     -3.00,    0.00,      False,     False],
#     ] * len(dllu_scoring_configs)

main_agent = MixedAgent(agents, dllu_scoring_configs, use_cede=True, use_beat=True, anti_geo_agents=anti_geo_agents)



def call_agent(obs, conf):
    return main_agent(obs, conf)


In [None]:
from kaggle_environments import evaluate, make, utils

env = make("rps", debug=True)
env.run(["geometry.py", "../input/rps-geometry2/geometry.py"]);
env.render(mode="ipython", width=600, height=450)

In [None]:
# import os
# import pandas as pd
# import kaggle_environments
# from datetime import datetime
# import multiprocessing as pymp
# from tqdm import tqdm
# import ray.util.multiprocessing as raymp
# pd.set_option('display.max_rows', 100)

# # function to return score
# def get_result(match_settings):
#     start = datetime.now()
#     outcomes = kaggle_environments.evaluate(
#         'rps', [match_settings[0], match_settings[1]], num_episodes=match_settings[2])
#     won, lost, tie, avg_score = 0, 0, 0, 0.
#     for outcome in outcomes:
#         score = outcome[0]
#         if score > 0: won += 1
#         elif score < 0: lost += 1
#         else: tie += 1
#         avg_score += score
#     elapsed = datetime.now() - start
#     return match_settings[1], won, lost, tie, elapsed, float(avg_score) / float(match_settings[2])


# def eval_agent_against_baselines(agent, baselines, num_episodes=20, use_ray=False):
#     df = pd.DataFrame(
#         columns=['wins', 'loses', 'ties', 'total time', 'avg. score'],
#         index=range(len(baselines))
#     )
    
#     if use_ray:
#         pool = raymp.Pool()
#     else:
#         pool = pymp.Pool()
        
#     matches = [[agent, baseline, num_episodes] for baseline in baselines]

#     results = []
#     for content in pool.imap_unordered(get_result, matches):
#         results.append(content)
        
#     for i , (baseline_agent, won, lost, tie, elapsed, avg_score) in enumerate(results):
#         df.loc[i, 'wins'] = won
#         df.loc[i, 'loses'] = lost
#         df.loc[i, 'ties'] = tie
#         df.loc[i, 'total time'] = elapsed
#         df.loc[i, 'avg. score'] = avg_score
        
#     return df

In [None]:
# agents = [AgentWrapper(Agent(0.07, None)), AgentWrapper(Agent(0.05, 40)),AgentWrapper(Agent(0.03, 20)), MirrorAgentWrapper(Agent(0.07, None)),MirrorAgentWrapper(Agent(0.05, 40)),MirrorAgentWrapper(Agent(0.03, 20))]
# dllu_scoring_configs =  [
#         [0.60,  3.00,    0.00,     -3.00,    0.00,      False,     False]
#     ] * len(agents)
# dllu_scoring_configs *= 3
# use_cede_beat_agent = MixedAgent(agents, dllu_scoring_configs, use_cede=True, use_beat=True)

# agents = [AgentWrapper(Agent(0.07, None)), AgentWrapper(Agent(0.05, 40)),AgentWrapper(Agent(0.03, 20)), MirrorAgentWrapper(Agent(0.07, None)),MirrorAgentWrapper(Agent(0.05, 40)),MirrorAgentWrapper(Agent(0.03, 20))]
# dllu_scoring_configs =  [
#         [0.60,  3.00,    0.00,     -3.00,    0.00,      False,     False]
#     ] * len(agents)
# dllu_scoring_configs *= 2
# use_beat_agent = MixedAgent(agents, dllu_scoring_configs, use_cede=False, use_beat=True)

# agents = [AgentWrapper(Agent(0.07, None)), MirrorAgentWrapper(Agent(0.07, None))]
# dllu_scoring_configs =  [
#         [0.60,  3.00,    0.00,     -3.00,    0.00,      False,     False]
#     ] * len(agents)
# dllu_scoring_configs *= 2
# mixed_agent_cede_no_random_1 = MixedAgent(agents, dllu_scoring_configs, use_cede=True)


# agents = [AgentWrapper(Agent(0.07, None)), AgentWrapper(Agent(0.05, 40)),AgentWrapper(Agent(0.03, 20)), MirrorAgentWrapper(Agent(0.07, None)),MirrorAgentWrapper(Agent(0.05, 40)),MirrorAgentWrapper(Agent(0.03, 20))]
# dllu_scoring_configs =  [
#         [0.60,  3.00,    0.00,     -3.00,    0.00,      False,     False]
#     ] * len(agents)
# dllu_scoring_configs *= 2
# mixed_agent_add_cede_no_random = MixedAgent(agents, dllu_scoring_configs, use_cede=True)

# base_lines = [Agent(0.07, None), mixed_agent_cede_no_random_1, use_cede_beat_agent, use_beat_agent]

# eval_agent_against_baselines(mixed_agent_add_cede_no_random, base_lines , 30)

In [None]:
# from kaggle_environments import evaluate, make, utils
# env = make("rps", debug=True)

# from geometry import AgentWrapper, MirrorAgentWrapper, Agent
# from anti_geo import AntiGeoAgentWrapper, AntiGeoMirrorAgentWrapper, GeobotBeater

# num_win=0
# num_loss=0
# num_matches=0
# # 0.02, 0.05, 0.07, 0.1 , 0.15

# for _ in range(5):
#     env.reset()
# #     result=env.run([GeobotBeater(0.02, None), Agent(0.01, 20)])
#     result=env.run(["geometry.py", Agent(0.01, 20)])
#     result=env.run(["geometry.py", "../input/rps-geometry2/geometry.py"])
#     reward=result[-1][0]["observation"]["reward"]
#     if reward>20:
#         num_win+=1
#     if reward<-20:
#         num_loss+=1
#     num_matches+=1
    
#     print(f"{reward:+4.0f}, {num_matches:2d} matches, {num_win/num_matches:5.1%} win, {num_loss/num_matches:5.1%} loss")