In [1]:
from pyperplan import _parse, _ground
import numpy as np
from copy import deepcopy
from abc import ABC, abstractmethod
from embedding import NaiveEmb, IntegerEmb

### Gym Env Basic

In [2]:
import gym, gym.spaces as spaces
from gym.utils import seeding
from pyperplan import _parse, _ground
from embedding import NaiveEmb, IntegerEmb
from collections import defaultdict

In [3]:
from pddl2env import PddlBasicEnv

In [4]:
env = PddlBasicEnv('pddl_files/logistics/domain.pddl',
                 'pddl_files/logistics/problogistics-50-0.pddl')

In [5]:
mutexes = defaultdict(set)
for f in env.task.facts:
  a = f.replace('(','').replace(')','')
  key_val = a.split(' ')
  mutexes[' '.join(key_val[:2])].add(key_val[2])

In [6]:
env.reset().dtype

dtype('int32')

In [11]:
len(env.get_actions())

147

In [8]:
env.step(env.E.state_to_emb(env._actions[0]))

(array([  13,   34,   49,   79,  108,  199,  201,  252,  335,  354,  365,
         411,  420,  495,  499,  599,  609,  646,  666,  689,  719,  833,
         896,  898,  950,  992, 1057, 1151, 1213, 1382, 1418, 1431, 1473,
        1491, 1509, 1642, 1654, 1665, 1727, 1870, 1877, 1948, 1966, 1982,
        2048, 2103, 2128, 2173, 2255, 2436, 2467, 2520, 2526, 2530, 2580,
        2654, 2709, 2773, 2780, 2799, 2812, 2815, 2818, 2893, 2934, 2997,
        3055, 3119, 3214, 3337, 3434, 3577, 3597]), -1, 0, None)

In [39]:
class PddlSimpleMultiGoalEnv(gym.GoalEnv):
  def __init__(self, domain, instance, embedding_fn=IntegerEmb):
    self.problem = _parse(domain_file=domain, problem_file=instance)
    self.task = _ground(self.problem)
    self.E = embedding_fn(self.task)
    
    mutexes = defaultdict(set)
    for f in self.task.facts:
      a = f.replace('(','').replace(')','')
      key_val = a.split(' ')
      mutexes[' '.join(key_val[:2])].add(key_val[2])

      
    special_obj = ''.join([i for i in list(self.task.goals)[0].split(' ')[1] if not i.isdigit()])
    
    self.mutexes = {k:v for k, v in mutexes.items() if special_obj in k}
    self._goal_set = [f for f in self.task.facts if special_obj in f]
    assert(len(self._goal_set) == sum([len(v) for v in self.mutexes.values()]))
    
    self.goal_len = sum([len(v) for v in self.mutexes.values()])
    
    self.basic_init = frozenset([f for f in env.task.initial_state if not special_obj in f])
    
    self.action_space = spaces.Discrete(1000)
    self.observation_space = spaces.Dict(dict(
            desired_goal=spaces.MultiBinary(self.goal_len),
            achieved_goal=spaces.MultiBinary(self.goal_len),
            observation=spaces.MultiBinary(len(self.task.facts)),
        ))
    
    self.reward_range = (-1., 0.)

    self.seed()
    self._state = None
    self._goal = None
    self._actions = None
    self.reset()

  def seed(self, seed=None):
    self.np_random, seed = seeding.np_random(seed)
    return [seed]
  
  def get_actions(self):
    return [self.E.state_to_emb(a) for a in self._actions]

  def get_actions_for_emb(self, state_emb):
    actions = [
      next_state for _, next_state in self.task.get_successor_states(
        self.E.emb_to_state(state_emb)
      )
    ]
    return [self.E.state_to_emb(a) for a in actions]
  
  def _sample_goal(self):
    return frozenset(['({} {})'.format(k, np.random.choice(list(v))) for k, v in self.mutexes.items()])
  
  def reset(self):
    self._state = self.basic_init.union(self._sample_goal())
    self._goal = self._sample_goal()
    self._goal_emb = self.E.state_to_emb(self._goal)
    self._actions = [
        next_state
        for op, next_state in self.task.get_successor_states(self._state)
    ]
    return self._get_obs()
  
  def _get_obs(self):
    return {
      'observation': self.E.state_to_emb(self._state),
      'achieved_goal': self.E.state_to_emb(self._state.intersection(self._goal_set)),
      'desired_goal': self._goal_emb
    }

  def render(self, mode=None):
    raise NotImplementedError

  def close(self):
    pass

  def step(self, action):
    next_state = self.E.emb_to_state(action)
    if next_state not in self._actions:
      raise ValueError('bad action!')
    self._state = next_state
    self._actions = [
        next_state
        for op, next_state in self.task.get_successor_states(self._state)
    ]

    reward = -1
    if len(self._goal.intersection(self._state)) == len(self._goal):
      reward = 0

    return self._get_obs(), reward, reward + 1, None

In [40]:
e = PddlSimpleMultiGoalEnv('pddl_files/logistics/domain.pddl',
                 'pddl_files/logistics/problogistics-50-0.pddl')

### Continuous embeddings

In [7]:
# We can now get an N-dimensional continuous embedding using a trainable embedding matrix
# This is equivalent to having an N-dimensional embedding for each "fact/fluent", and then adding them together.
N = 16
W_emb = np.random.normal(0., 1e-2, (len(task.facts), N))

# So now the initial embedding of the initial state is:
np.expand_dims(naive_emb(task.initial_state), 0).dot(W_emb)

array([[-0.05030386, -0.01080087, -0.01502595,  0.04162736,  0.07952476,
         0.0474224 , -0.0305328 ,  0.04548838,  0.06364573,  0.0182024 ,
        -0.04808633,  0.00967416, -0.05022812, -0.01181706,  0.07991284,
         0.03560514]])

## Smarter embedding

- this is non-trivial... we should discuss