###  Obervations

Observation features, that will be available to the active agent while interacting with the environment, are handle by `Observations` class. 

In [19]:
from maenvs4vrp.environments.cvrptw.observations import Observations
from maenvs4vrp.environments.cvrptw.instances_generator import InstanceGenerator
from maenvs4vrp.environments.cvrptw.env_agent_selector import AgentSelector
from maenvs4vrp.environments.cvrptw.env_agent_reward import DenseReward
from maenvs4vrp.environments.cvrptw.env import Environment

In [20]:
obs = Observations()

The class has a `default_feature_list` attribute where the default configuration dictionary is defined.

In [21]:
obs.default_feature_list

{'nodes_static': {'x_coordinate': {'feat': 'x_coordinate', 'norm': None},
  'y_coordinate': {'feat': 'y_coordinate', 'norm': None},
  'tw_low': {'feat': 'tw_low', 'norm': None},
  'tw_high': {'feat': 'tw_high', 'norm': None},
  'demand': {'feat': 'demand', 'norm': None},
  'service_time': {'feat': 'service_time', 'norm': 'min_max'},
  'is_depot': {'feat': 'is_depot', 'norm': None}},
 'nodes_dynamic': ['time2open_div_end_time',
  'time2close_div_end_time',
  'arrive2node_div_end_time',
  'time2open_after_step_div_end_time',
  'time2close_after_step_div_end_time',
  'time2end_after_step_div_end_time',
  'fract_time_after_step_div_end_time',
  'reachable_frac_agents'],
 'agent': ['x_coordinate',
  'y_coordinate',
  'frac_current_time',
  'frac_current_load',
  'arrivedepot_div_end_time',
  'frac_feasible_nodes'],
 'other_agents': ['x_coordinate',
  'y_coordinate',
  'frac_current_time',
  'frac_current_load',
  'frac_feasible_nodes',
  'dist2agent_div_end_time',
  'time_delta2agent_div_ma

Also, five possible features lists exist, detailing the available features in the class: `POSSIBLE_NODES_STATIC_FEATURES`, `POSSIBLE_NODES_DYNAMIC_FEATURES`, `POSSIBLE_SELF_FEATURES`, `POSSIBLE_AGENTS_FEATURES`, `POSSIBLE_GLOBAL_FEATURES`. For example:

In [22]:
obs.POSSIBLE_NODES_STATIC_FEATURES

['x_coordinate',
 'y_coordinate',
 'tw_low',
 'tw_high',
 'demand',
 'service_time',
 'tw_high_minus_tw_low_div_max_dur',
 'x_coordinate_min_max',
 'y_coordinate_min_max',
 'is_depot']

In [23]:
obs.POSSIBLE_GLOBAL_FEATURES

['frac_demands', 'frac_fleet_load_capacity', 'frac_done_agents']

While instantiating the `Observations` class, we can pass through a feature list dictionary specifying which features will be available for the agent:

In [24]:
import yaml

In [25]:
feature_list = yaml.safe_load("""
    nodes_static:
        x_coordinate_min_max:
            feat: x_coordinate_min_max
            norm: min_max
        x_coordinate_min_max: 
            feat: x_coordinate_min_max
            norm: min_max
        tw_low_mm:
            feat: tw_low
            norm: min_max
        tw_high:
            feat: tw_high
            norm: min_max

    nodes_dynamic:
        - time2open_div_end_time
        - time2close_div_end_time
        - time2open_after_step_div_end_time
        - time2close_after_step_div_end_time
        - fract_time_after_step_div_end_time

    agent:
        - x_coordinate_min_max
        - y_coordinate_min_max
        - frac_current_time
        - frac_current_load

    other_agents:
        - x_coordinate_min_max
        - y_coordinate_min_max
        - frac_current_time
        - frac_current_load
        - dist2agent_div_end_time
    
    global:
        - frac_demands
        - frac_fleet_load_capacity
        - frac_done_agents
        - frac_not_done_nodes
        - frac_used_agents
""")

In [26]:
obs = Observations(feature_list)

We can test this observations on the environment:

In [27]:
gen = InstanceGenerator(batch_size = 8)
obs = Observations()
sel = AgentSelector()
rew = DenseReward()

env = Environment(instance_generator_object=gen,  
                  obs_builder_object=obs,
                  agent_selector_object=sel,
                  reward_evaluator=rew,
                  seed=0)

In [28]:
td = env.reset(batch_size = 8, num_agents=4, num_nodes=16)

In [29]:
td_observation = env.observe()

In [30]:
td_observation

TensorDict(
    fields={
        action_mask: Tensor(shape=torch.Size([8, 16]), device=cpu, dtype=torch.bool, is_shared=False),
        agent_obs: Tensor(shape=torch.Size([8, 6]), device=cpu, dtype=torch.float32, is_shared=False),
        agents_mask: Tensor(shape=torch.Size([8, 4]), device=cpu, dtype=torch.bool, is_shared=False),
        global_obs: Tensor(shape=torch.Size([8, 3]), device=cpu, dtype=torch.float32, is_shared=False),
        node_dynamic_obs: Tensor(shape=torch.Size([8, 16, 8]), device=cpu, dtype=torch.float32, is_shared=False),
        other_agents_obs: Tensor(shape=torch.Size([8, 4, 8]), device=cpu, dtype=torch.float32, is_shared=False)},
    batch_size=torch.Size([8]),
    device=cpu,
    is_shared=False)