In [1]:
#| default_exp environments

# Imports

In [2]:
#| export
import numpy as np
from abc import ABC, abstractmethod

# Abstract Environment

In [3]:
#| export
class Abstract_Env(ABC):
    """A minimal Environment, every environment should be Derived from this class.

    Examples:
    >>> pass
    """

    def __init__(self, 
                 state: object):
        """
        Args:
            state: an object that defines the state of the environment            
        """
        self.state = state

    @abstractmethod
    def transition(self, action):
        """
        Args:
            action: an action (or actions) to process
        """
        raise NotImplementedError

    @abstractmethod
    def get_observation(self):
        """
        should determine and return an observation for an agent or agents as a function of self.state
        """
        raise NotImplementedError

# Cyclic Environment

This envirnoment defines a fixed sequence of percepts that may be passed to an agent. Primarily useful for testing predictive ECMs in Hidden Markov Processes with no observation error.

In [4]:
#| export
class Cyclic_Env(Abstract_Env):
    """
    An environment that cycles deterministically through a sequence of percepts that may be passed to an agent
    """
    def __init__(self,
                 percept_cycle: np.ndarray,
                 initial_state: int = 0):
        self.percept_cycle = percept_cycle
        state = initial_state
        super().__init__(state = state)

    def transition(self):
        """
        This environment has deterministic transitions and does not take actions as input
        """
        self.state = (self.state + 1) % len(self.percept_cycle)

    def get_observation(self):
        return self.percept_cycle[self.state:self.state+1] #slicing returns array instead of scalar    

## Example
In this example, we set up a cyclical environment in which a light turns green, turns off, turns blue, turns off, and then repeats.

In [5]:
percept_cycle = np.array(["green", "off", "blue", "off"])
light_cycle_instance1 = Cyclic_Env(percept_cycle)
T = 8 #total time steps to simulate
observed_percepts = ["None"] * T #data structure for storing observations

#simulate for T steps and store observations
for t in range(T):
    observed_percepts[t] = light_cycle_instance1.get_observation()
    light_cycle_instance1.transition()

observed_percepts

[array(['green'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['blue'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['green'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['blue'], dtype='<U5'),
 array(['off'], dtype='<U5')]

We can also choose to initiate anywhere in the cycle by giving the desired index. In this example, we start in State 2, which returns the "blue" percept.

In [6]:
light_cycle_instance2 = Cyclic_Env(percept_cycle, initial_state = 2)
T = 8 #total time steps to simulate
observed_percepts = ["None"] * T #data structure for storing observations

#simulate for T steps and store observations
for t in range(T):
    observed_percepts[t] = light_cycle_instance2.get_observation()
    light_cycle_instance2.transition()
    
observed_percepts

[array(['blue'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['green'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['blue'], dtype='<U5'),
 array(['off'], dtype='<U5'),
 array(['green'], dtype='<U5'),
 array(['off'], dtype='<U5')]

# RLGL

A description

In [7]:
#| export
class RLGL(Abstract_Env):
    def __init__(self, state = 0, transition_matrix = None):
        self.state = state
        self.state_labels = {0: "red", 1: "green"}
        if transition_matrix is None:
            #create random uniform transition probabilities
            transition_matrix = np.array([[0.5,0.5],[0.5,0.5]])
        assert np.shape(transition_matrix) == (2,2)
        self.transition_matrix = transition_matrix            

    def transition(self, action):
        '''
        In this environment the agents action determines the reward but does not determine the state
        '''
        self.state = np.random.choice(range(2), p = self.transition_matrix[self.state,])

    def get_observation(self):
        return self.state_labels[self.state]

    def get_reward(self, action):
        if action == self.state:
            return 1
        else:
            return 0

A minimal example

# Causal Dynamic Bayesian Network
This environment implements a specific kind of Dynamic Bayesian Network in which variables are not allowed to have parents in the same time step. This the state of all variables at a given time are conditionally independent given the state of the system in the previous time step.

In [39]:
#|export
import inspect
class Causal_DBN(Abstract_Env):
    def __init__(self,
                 state: np.ndarray, #a one dimensional array. Each element gives the state of a variable.
                 causal_network: np.ndarray, #a square boolean array that indicates whether a variable at t is a parent of another variable at t+1
                 update_functions: dict, #a dictionary of the functions used to update each variable
                 variable_names: np.ndarray = None, #an optional list of variables names. Default is integers. Must match keys of update functions
                 action_variables: np.ndarray = None #indicates which system variables are under the control of an agent. Used to ensure inputs to transition function are correct
                ):
        if variable_names is None:
            variable_names = np.array(range(self.num_variables))

        #check variables
        if not state.ndim == 1:
            raise ValueError("'state' must be a numpy array with a single dimension")
        self.num_variables = np.shape(state)[0]

        assert causal_network.dtype == np.bool_
        if not np.shape(causal_network) == (self.num_variables, self.num_variables):
            raise ValueError("causal network must be a square matrix with each dimension equal to the number of variables given by the state input")

        if action_variables is None:
            action_variables = np.full(self.num_variables, fill_value = False)
        for action_variable in variable_names[action_variables]:
            update_functions[action_variable] = self.action_function        
        for key, update_f in update_functions.items():
            if not key in variable_names:
                raise ValueError("Keys of update_function dictionary must correspond to variable names. Default variable names are integer indices")
            assert callable(update_f)            
            ## Check that update function inputs match causal_network
            function_parents = list(inspect.signature(update_f).parameters)
            i = np.where(variable_names == key)[0][0] #get parent index (indexes get first match in first dimension)
            if not set(function_parents) == set(variable_names[causal_network[:,i]]): #compare input variables names to children in DBN
                raise ValueError(f'The update function for {variable_names[i]} does not have input variables that match parents in causal_network')

        if not len(update_functions) == self.num_variables:
            raise ValueError("there must be an update function for each variable in 'state'")


        self.state = state
        self.causal_network = causal_network
        self.update_functions = update_functions
        self.variable_names = variable_names
        self.action_variables = action_variables

    def transition(self, action: dict = None):
        if action is None:
            action = {}

        #update action states
        for key, value in action.items():
            if not key in self.variable_names:
                raise ValueError("keys of action dictionary must be environment variable names")
            self.state[self.variable_names == key] = value

        #apply transition functions
        new_states = np.zeros(self.num_variables)
        for variable, update_f in self.update_functions.items():
            required_args = set(inspect.signature(update_f).parameters.keys())
            input_dict = {k: v for k, v in zip(self.variable_names, self.state) if k in required_args}
            new_states[self.variable_names == variable] = update_f(**input_dict)
        self.state = new_states

    def get_observation(self):
        return self.state
    
    def action_function(self, x): #as actions are given as input, the stored update functions for these variables should not do anything
        return x

### Example

In [49]:
state = np.array((1.,2.,True))
causal_network = np.array([[False,False,True],
                           [False,False,True],
                           [False,False,False]]
                         )
def Bernoulli():
    return np.random.binomial(1,0.5)
def Pair_Match(a,b):
    return a == b
update_functions = {"a": Bernoulli, "b": Bernoulli, "test": Pair_Match}
variable_names = np.array(["a", "b", "test"])
test_DBN = Causal_DBN(state, causal_network, update_functions, variable_names)
test_DBN.transition()
test_DBN.state

array([1., 0., 0.])

In [46]:
Pair_Match(1,2)

False

In [47]:
state

array([1, 0, 1])