Add tfmdp.model.cell.ReparameterizationCell class

thiagopbueno · Apr 17, 2019 · baac40c · baac40c
1 parent 0e5c1b5
commit baac40c
Show file tree

Hide file tree

Showing 7 changed files with 204 additions and 8 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -3,7 +3,7 @@ numpy==1.14.2
 ply==3.11
 pyrddl>=0.1.8
 rddlgym>=0.5.8
-rddl2tf>=0.5.2
+rddl2tf>=0.5.3
 tensorflow==1.5.0
 tensorflow-tensorboard==1.5.1
 tfrddlsim>=0.7.0

diff --git a/setup.py b/setup.py
@@ -24,7 +24,7 @@ def read(filename):
     packages=find_packages(),
     scripts=['scripts/tfmdp'],
     install_requires=[
-        'rddl2tf>=0.5.2',
+        'rddl2tf>=0.5.3',
         'rddlgym>=0.5.8',
         'tfrddlsim>=0.7.0',
         'tensorflow',

diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py
@@ -78,7 +78,7 @@ def test_noise_decoding(self):
         for t in range(self.horizon):
             inputs_per_timestep = self.inputs[:, t, :]
 
-            noise_variables_per_timestep = utils.decode_inputs_as_noise(self.inputs, self.encoding)
+            noise_variables_per_timestep = utils.decode_inputs_as_noise(inputs_per_timestep, self.encoding)
             self.assertIsInstance(noise_variables_per_timestep, list)
             self.assertEqual(len(noise_variables_per_timestep), len(noise_variables_lst))
             for xi_per_timestep, xi in zip(noise_variables_per_timestep, noise_variables_lst):

diff --git a/tests/test_reparameterization_cell.py b/tests/test_reparameterization_cell.py
@@ -0,0 +1,90 @@
+# This file is part of tf-mdp.
+
+# tf-mdp is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# tf-mdp is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with tf-mdp. If not, see <http://www.gnu.org/licenses/>.
+
+import rddlgym
+
+import rddl2tf.reparam
+
+from tfmdp.policy.feedforward import FeedforwardPolicy
+from tfmdp.model.cell.reparameterization_cell import ReparameterizationCell, OutputTuple
+
+from tfmdp.model import utils
+
+import tensorflow as tf
+import unittest
+
+
+class TestReparameterizationCell(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+
+        # hyper-parameters
+        cls.horizon = 40
+        cls.batch_size = 16
+
+        # rddl
+        cls.compiler = rddlgym.make('Navigation-v2', mode=rddlgym.SCG)
+        cls.compiler.batch_mode_on()
+
+        # initial state
+        cls.initial_state = cls.compiler.compile_initial_state(cls.batch_size)
+
+        # default action
+        cls.default_action = cls.compiler.compile_default_action(cls.batch_size)
+
+        # policy
+        cls.policy = FeedforwardPolicy(cls.compiler, {'layers': [64, 64], 'activation': 'relu', 'input_layer_norm': True})
+        cls.policy.build()
+
+        with cls.compiler.graph.as_default():
+
+            # reparameterization
+            cls.noise_shapes = rddl2tf.reparam.get_cpfs_reparameterization(cls.compiler.rddl)
+            cls.noise_variables = utils.get_noise_variables(cls.noise_shapes, cls.batch_size, cls.horizon)
+            cls.noise_inputs, cls.encoding = utils.encode_noise_as_inputs(cls.noise_variables)
+
+            # timestep
+            cls.timestep = tf.constant(cls.horizon, dtype=tf.float32)
+            cls.timestep = tf.expand_dims(cls.timestep, -1)
+            cls.timestep = tf.stack([cls.timestep] * cls.batch_size)
+
+            # inputs
+            cls.inputs = tf.concat([cls.timestep, cls.noise_inputs[:, 0, :]], axis=1)
+
+        # cell
+        cls.config = { 'encoding': cls.encoding }
+        cls.cell = ReparameterizationCell(cls.compiler, cls.policy, cls.config)
+
+    def test_call(self):
+        output, next_state = self.cell(self.inputs, self.initial_state)
+
+        self.assertIsInstance(output, OutputTuple)
+        self.assertEqual(len(output), 4)
+
+        self.assertEqual(output.state, output[0])
+        self.assertEqual(output.action, output[1])
+        self.assertEqual(output.interms, output[2])
+        self.assertEqual(output.reward, output[3])
+
+        self.assertEqual(output.state, next_state)
+
+        for action_fluent, default_action_fluent in zip(output.action, self.default_action):
+            self.assertEqual(action_fluent.shape, default_action_fluent.shape)
+
+        self.assertListEqual(output.reward.shape.as_list(), [self.batch_size, 1])
+
+        for fluent, next_fluent in zip(self.initial_state, next_state):
+            self.assertEqual(fluent.shape, next_fluent.shape)
diff --git a/tfmdp/__init__.py b/tfmdp/__init__.py
@@ -1,2 +1,2 @@
-__version__ = '0.5.2'
-__release__ = 'v0.5.2-alpha'
+__version__ = '0.5.3'
+__release__ = 'v0.5.3-alpha'
diff --git a/tfmdp/model/cell/reparameterization_cell.py b/tfmdp/model/cell/reparameterization_cell.py
@@ -0,0 +1,103 @@
+# This file is part of tf-mdp.
+
+# tf-mdp is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# tf-mdp is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with tf-mdp. If not, see <http://www.gnu.org/licenses/>.
+
+
+import rddl2tf
+
+from tfmdp.policy.drp import DeepReactivePolicy
+
+from tfmdp.model.cell.basic_cell import BasicMarkovCell
+from tfmdp.model import utils
+
+import collections
+import tensorflow as tf
+
+from typing import Dict, Optional, Sequence, Tuple, Union
+
+Shape = Sequence[int]
+FluentPair = Tuple[str, rddl2tf.fluent.TensorFluent]
+
+NonFluentsTensor = Sequence[tf.Tensor]
+StateTensor = Sequence[tf.Tensor]
+StatesTensor = Sequence[tf.Tensor]
+ActionsTensor = Sequence[tf.Tensor]
+IntermsTensor = Sequence[tf.Tensor]
+
+CellOutput = Tuple[StatesTensor, ActionsTensor, IntermsTensor, tf.Tensor]
+CellState = Sequence[tf.Tensor]
+
+OutputTuple = collections.namedtuple('OutputTuple', 'state action interms reward')
+
+
+class ReparameterizationCell(BasicMarkovCell):
+    '''ReparameterizationCell extends the tfmdp.model.cell.basic_cell class
+    to implement a version of a MarkovCell where all stochastic nodes
+    are reparameterized. Noise variables are given as inputs.
+
+    Args:
+        compiler (:obj:`rddl2tf.compiler.Compiler`): RDDL2TensorFlow compiler.
+        config (Dict): The cell configuration parameters.
+    '''
+
+    def __init__(self,
+                 compiler: rddl2tf.compiler.Compiler,
+                 policy: DeepReactivePolicy,
+                 config: Optional[Dict] = None):
+        self.compiler = compiler
+        self.policy = policy
+        self.config = config
+
+    def __call__(self,
+                 inputs: tf.Tensor,
+                 state: Sequence[tf.Tensor],
+                 scope: Optional[str] = None) -> Tuple[CellOutput, CellState]:
+        '''Returns the cell's output tuple and next state tensors.
+
+        Output tuple packs together the next state, action, interms,
+        and reward tensors in order.
+
+        Args:
+            inputs (tf.Tensor): The encoded (timestep, noise) input tensor.
+            state (Sequence[tf.Tensor]): The current state tensors.
+            scope (Optional[str]): The cell name scope.
+
+        Returns:
+            (CellOutput, CellState): A pair with the cell's output tuple and next state.
+        '''
+
+        # inputs
+        timestep = tf.expand_dims(inputs[:, 0], -1)
+
+        noise = inputs[:, 1:]
+
+        # noise
+        noise = utils.decode_inputs_as_noise(noise, self.config['encoding'])
+        noise = dict(noise)
+
+        # action
+        action = self.policy(state, timestep)
+
+        # next state
+        interms, next_state = self.compiler.cpfs(state, action, noise=noise)
+
+        # reward
+        reward = self.compiler.reward(state, action, next_state)
+
+        # outputs
+        next_state = utils.to_tensor(next_state)
+        interms = utils.to_tensor(interms)
+        output = OutputTuple(next_state, action, interms, reward)
+
+        return (output, next_state)
diff --git a/tfmdp/model/utils.py b/tfmdp/model/utils.py
@@ -89,12 +89,15 @@ def encode_noise_as_inputs(noise_variables: Noise) -> Tuple[tf.Tensor, NoiseEnco
 
 def decode_inputs_as_noise(inputs: tf.Tensor, encoding: NoiseEncoding) -> Noise:
     noise_variables = []
+
     for name, slices in encoding:
         xi_lst = []
+
         for start, end, shape in slices:
-            xi = inputs[:, start:end]
-            batch_size = int(xi.shape[0])
-            xi = tf.reshape(xi, [batch_size, *shape])
+            xi = inputs[:, start:end+1]
+            xi = tf.reshape(xi, [-1, *shape])
             xi_lst.append(xi)
+
         noise_variables.append((name, xi_lst))
+
     return noise_variables