In [None]:
#| default_exp environments

# Imports

In [None]:
#| export
import numpy as np
from abc import ABC, abstractmethod

# Abstract Environment

In [None]:
#| export
class Abstract_Env(ABC):
    """A minimal Environment, every environment should be Derived from this class.

    Examples:
    >>> pass
    """

    def __init__(self, 
                 state: object):
        """
        Args:
            state: an object that defines the state of the environment            
        """
        self.state = state

    @abstractmethod
    def transition(self, action):
        """
        Args:
            action: an action (or actions) to process
        """
        raise NotImplementedError

    @abstractmethod
    def get_observation(self):
        """
        should determine and return an observation for an agent or agents as a function of self.state
        """
        raise NotImplementedError

# Cyclic Environment

This envirnoment defines a fixed sequence of percepts that may be passed to an agent. Primarily useful for testing predictive ECMs in Hidden Markov Processes with no observation error.

In [None]:
#| export
class Cyclic_Env(Abstract_Env):
    """
    An environment that cycles deterministically through a sequence of percepts that may be passed to an agent
    """
    def __init__(self,
                 percept_cycle: np.ndarray,
                 initial_state: int = 0):
        self.percept_cycle = percept_cycle
        state = initial_state
        super().__init__(state = state)

    def transition(self):
        """
        This environment has deterministic transitions and does not take actions as input
        """
        self.state = (self.state + 1) % len(self.percept_cycle)

    def get_observation(self):
        return self.percept_cycle[self.state:self.state+1] #slicing returns array instead of scalar    

## Example
In this example, we set up a cyclical environment in which a light turns green, turns off, turns blue, turns off, and then repeats.

In [None]:
percept_cycle = np.array(["green", "off", "blue", "off"])
light_cycle_instance1 = Cyclic_Env(percept_cycle)
T = 8 #total time steps to simulate
observed_percepts = ["None"] * T #data structure for storing observations

#simulate for T steps and store observations
for t in range(T):
    observed_percepts[t] = light_cycle_instance1.get_observation()
    light_cycle_instance1.transition()

observed_percepts

[array(['green'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['blue'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['green'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['blue'], dtype='<U5'),
 array(['off'], dtype='<U5')]

We can also choose to initiate anywhere in the cycle by giving the desired index. In this example, we start in State 2, which returns the "blue" percept.

In [None]:
light_cycle_instance2 = Cyclic_Env(percept_cycle, initial_state = 2)
T = 8 #total time steps to simulate
observed_percepts = ["None"] * T #data structure for storing observations

#simulate for T steps and store observations
for t in range(T):
    observed_percepts[t] = light_cycle_instance2.get_observation()
    light_cycle_instance2.transition()
    
observed_percepts

[array(['blue'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['green'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['blue'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['green'], dtype='<U5'),
 array(['off'], dtype='<U5')]

# RLGL

A description

In [None]:
#| export
class RLGL(Abstract_Env):
    def __init__(self, state = 0, transition_matrix = None):
        self.state = state
        self.state_labels = {0: "red", 1: "green"}
        if transition_matrix is None:
            #create random uniform transition probabilities
            transition_matrix = np.array([[0.5,0.5],[0.5,0.5]])
        assert np.shape(transition_matrix) == (2,2)
        self.transition_matrix = transition_matrix            

    def transition(self, action):
        '''
        In this environment the agents action determines the reward but does not determine the state
        '''
        self.state = np.random.choice(range(2), p = self.transition_matrix[self.state,])

    def get_observation(self):
        return self.state_labels[self.state]

    def get_reward(self, action):
        if action == self.state:
            return 1
        else:
            return 0

A minimal example