How to create the strategy __Meta(S)__ that beats the agent who uses a deterministic strategy __S__

In [None]:
%%writefile copy_opponent_agent.py
    
def copy_opponent_agent(observation, configuration):
    if observation.step > 0:
        return observation.lastOpponentAction
    else:
        return 0

In [None]:
%%writefile meta_copy_opponent.py

from abc import ABCMeta, abstractmethod
from dataclasses import dataclass
import numpy as np

class Agent(metaclass=ABCMeta):
    @abstractmethod
    def play(self, observation, configuration, last_my_hand: int) -> int:
        pass
    
    @property
    def name(self) -> str:
        return self.__class__.__name__

@dataclass
class Meta(Agent):
    agent: Agent

    def play(self, observation, configuration, last_my_hand):
        # swap my hand and opponent's hand
        last_opp_hand = observation.lastOpponentAction if observation.step > 0 else None
        obs = observation.__class__(lastOpponentAction=last_my_hand, step=observation.step)
        hand = self.agent.play(obs, configuration, last_opp_hand)
        return (hand + 1) % 3
    @property
    def name(self):
        return "Meta(%s)" % self.agent.name
    
class CopyOpponent(Agent):
    def play(self, observation, configuration, last_my_hand: int) -> int:
        if observation.step > 0:
            return observation.lastOpponentAction
        else:
            return 0

agent = Meta(CopyOpponent()) # just wrap the agent you want to beat
last_my_hand = None
def meta_copy_opponent(observation, configuration):
    global agent
    global last_my_hand
    last_my_hand = agent.play(observation, configuration, last_my_hand)
    return last_my_hand

In [None]:
from kaggle_environments import evaluate, make, utils
env = make("rps", debug=True)

In [None]:
env.reset()
env.run(["meta_copy_opponent.py", "copy_opponent_agent.py"])
env.render(mode="ipython", width=800, height=600)

In [None]:
%%writefile beat_popular.py

from abc import ABCMeta, abstractmethod
from dataclasses import dataclass
import numpy as np

class Agent(metaclass=ABCMeta):
    @abstractmethod
    def play(self, observation, configuration, last_my_hand: int) -> int:
        pass
    
    @property
    def name(self) -> str:
        return self.__class__.__name__

@dataclass
class Meta(Agent):
    agent: Agent

    def play(self, observation, configuration, last_my_hand):
        # swap my hand and opponent's hand
        last_opp_hand = observation.lastOpponentAction if observation.step > 0 else None
        obs = observation.__class__(lastOpponentAction=last_my_hand, step=observation.step)
        hand = self.agent.play(obs, configuration, last_opp_hand)
        return (hand + 1) % 3
    @property
    def name(self):
        return "Meta(%s)" % self.agent.name

##################################################################
class BeatPopular(Agent):
    def __init__(self):
        self.population = np.zeros(3)
    def play(self, observation, configuration, last_my_hand):
        if observation.step == 0:
            return np.random.randint(3)
        self.population[observation.lastOpponentAction] += 1
        return int(np.argmax(self.population))
    
agent = BeatPopular()
last_my_hand = None
def beat_popular(observation, configuration):
    global agent
    global last_my_hand
    last_my_hand = agent.play(observation, configuration, last_my_hand)
    return last_my_hand

In [None]:
%%writefile meta_beat_popular.py

from abc import ABCMeta, abstractmethod
from dataclasses import dataclass
import numpy as np

class Agent(metaclass=ABCMeta):
    @abstractmethod
    def play(self, observation, configuration, last_my_hand: int) -> int:
        pass
    
    @property
    def name(self) -> str:
        return self.__class__.__name__

@dataclass
class Meta(Agent):
    agent: Agent

    def play(self, observation, configuration, last_my_hand):
        # swap my hand and opponent's hand
        last_opp_hand = observation.lastOpponentAction if observation.step > 0 else None
        obs = observation.__class__(lastOpponentAction=last_my_hand, step=observation.step)
        hand = self.agent.play(obs, configuration, last_opp_hand)
        return (hand + 1) % 3
    @property
    def name(self):
        return "Meta(%s)" % self.agent.name

##################################################################
class BeatPopular(Agent):
    def __init__(self):
        self.population = np.zeros(3)
    def play(self, observation, configuration, last_my_hand):
        if observation.step == 0:
            return np.random.randint(3)
        self.population[observation.lastOpponentAction] += 1
        return int(np.argmax(self.population))
    
agent = Meta(BeatPopular())
last_my_hand = None
def meta_beat_popular(observation, configuration):
    global agent
    global last_my_hand
    last_my_hand = agent.play(observation, configuration, last_my_hand)
    return last_my_hand

In [None]:
env.reset()
env.run(["beat_popular.py", "meta_beat_popular.py"])
env.render(mode="ipython", width=800, height=600)