# Playground

## Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
sys.path.append(f'{os.getcwd()}/../')

In [3]:
from gymnasium.spaces import Discrete

from src.wrapper import RestrictionWrapper
from examples.envs.rps import RPSEnvironment
from examples.utils import play, restriction_aware_random_policy

## Rock/Paper/Scissors

### Restrictor

In [4]:
from src.restrictions import DiscreteSetRestriction
from src.restrictors import RestrictorActionSpace, Restrictor


class RPSRestrictor(Restrictor):
    def __init__(self) -> None:
        super().__init__(observation_space=Discrete(1), action_space=RestrictorActionSpace(Discrete(3)))

    def preprocess_observation(self, env):
        # This functions 'flattens' the environment into a valid space while preserving all information that the restrictor needs
        return {'agent': env.agent_selection, 'last_action': int(
            env.observe(env.possible_agents[1 - env.agent_name_mapping[env.agent_selection]]))}

    def act(self, observation):
        # Structure of observation is defined by self.preprocess_observation
        agent, last_action = observation['agent'], observation['last_action']

        return DiscreteSetRestriction(Discrete(3), allowed_actions={0, 1, 2} - {last_action})
    
class DummyRPSRestrictor(Restrictor):
    def __init__(self) -> None:
        super().__init__(observation_space=Discrete(1), action_space=RestrictorActionSpace(Discrete(3)))

    def act(self, observation):
        return DiscreteSetRestriction(Discrete(3))

### Action Policies

In [5]:
def create_rps_policies(env, restrictors):
    return {**{agent: restriction_aware_random_policy for agent in env.possible_agents}, **{id: restrictor.act for id, restrictor in restrictors.items()}}

### Execution

#### RestrictionWrapper

In [6]:
env = RPSEnvironment(render_mode='human')
restrictor = RPSRestrictor() # Restrictor blocks each player's last action
wrapper = RestrictionWrapper(env, restrictor,
                             preprocess_restrictor_observation_fns=restrictor.preprocess_observation)

play(wrapper, create_rps_policies(env, {'restrictor_0': restrictor}))

player_0: NONE, player_1: NONE
player_0: PAPER, player_1: NONE
player_0: PAPER, player_1: SCISSORS
player_0: SCISSORS, player_1: NONE
player_0: SCISSORS, player_1: PAPER
player_0: ROCK, player_1: NONE
player_0: ROCK, player_1: SCISSORS
player_0: SCISSORS, player_1: NONE
player_0: SCISSORS, player_1: PAPER
player_0: PAPER, player_1: NONE
player_0: PAPER, player_1: ROCK
player_0: SCISSORS, player_1: NONE
player_0: SCISSORS, player_1: PAPER
player_0: ROCK, player_1: NONE
player_0: ROCK, player_1: ROCK
player_0: PAPER, player_1: NONE
player_0: PAPER, player_1: PAPER
player_0: SCISSORS, player_1: NONE
player_0: SCISSORS, player_1: ROCK
player_0: ROCK, player_1: NONE
player_0: ROCK, player_1: SCISSORS
Game over


#### MultiRestrictionWrapper

In [7]:
env = RPSEnvironment(render_mode='human')
restrictors = {
    'restrictor_0': RPSRestrictor(), # Restrictor blocks each player's last action
    'restrictor_1': DummyRPSRestrictor()
}

wrapper = RestrictionWrapper(env, restrictors,
                             agent_restrictor_mapping={'player_0': 'restrictor_0', 'player_1': 'restrictor_1'},
                             preprocess_restrictor_observation_fns={'restrictor_0': restrictors['restrictor_0']
                             .preprocess_observation})

play(wrapper, create_rps_policies(env, restrictors))

player_0: NONE, player_1: NONE
player_0: SCISSORS, player_1: NONE
player_0: SCISSORS, player_1: PAPER
player_0: ROCK, player_1: NONE
player_0: ROCK, player_1: SCISSORS
player_0: SCISSORS, player_1: NONE
player_0: SCISSORS, player_1: PAPER
player_0: ROCK, player_1: NONE
player_0: ROCK, player_1: PAPER
player_0: SCISSORS, player_1: NONE
player_0: SCISSORS, player_1: ROCK
player_0: PAPER, player_1: NONE
player_0: PAPER, player_1: SCISSORS
player_0: SCISSORS, player_1: NONE
player_0: SCISSORS, player_1: SCISSORS
player_0: ROCK, player_1: NONE
player_0: ROCK, player_1: SCISSORS
player_0: SCISSORS, player_1: NONE
player_0: SCISSORS, player_1: ROCK
player_0: PAPER, player_1: NONE
player_0: PAPER, player_1: ROCK
Game over
