Skip to content

Commit

Permalink
Add tfmdp.model.cell.ReparameterizationCell class
Browse files Browse the repository at this point in the history
  • Loading branch information
thiagopbueno committed Apr 17, 2019
1 parent 0e5c1b5 commit baac40c
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 8 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ numpy==1.14.2
ply==3.11
pyrddl>=0.1.8
rddlgym>=0.5.8
rddl2tf>=0.5.2
rddl2tf>=0.5.3
tensorflow==1.5.0
tensorflow-tensorboard==1.5.1
tfrddlsim>=0.7.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def read(filename):
packages=find_packages(),
scripts=['scripts/tfmdp'],
install_requires=[
'rddl2tf>=0.5.2',
'rddl2tf>=0.5.3',
'rddlgym>=0.5.8',
'tfrddlsim>=0.7.0',
'tensorflow',
Expand Down
2 changes: 1 addition & 1 deletion tests/test_model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_noise_decoding(self):
for t in range(self.horizon):
inputs_per_timestep = self.inputs[:, t, :]

noise_variables_per_timestep = utils.decode_inputs_as_noise(self.inputs, self.encoding)
noise_variables_per_timestep = utils.decode_inputs_as_noise(inputs_per_timestep, self.encoding)
self.assertIsInstance(noise_variables_per_timestep, list)
self.assertEqual(len(noise_variables_per_timestep), len(noise_variables_lst))
for xi_per_timestep, xi in zip(noise_variables_per_timestep, noise_variables_lst):
Expand Down
90 changes: 90 additions & 0 deletions tests/test_reparameterization_cell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# This file is part of tf-mdp.

# tf-mdp is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# tf-mdp is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with tf-mdp. If not, see <http://www.gnu.org/licenses/>.

import rddlgym

import rddl2tf.reparam

from tfmdp.policy.feedforward import FeedforwardPolicy
from tfmdp.model.cell.reparameterization_cell import ReparameterizationCell, OutputTuple

from tfmdp.model import utils

import tensorflow as tf
import unittest


class TestReparameterizationCell(unittest.TestCase):

@classmethod
def setUpClass(cls):

# hyper-parameters
cls.horizon = 40
cls.batch_size = 16

# rddl
cls.compiler = rddlgym.make('Navigation-v2', mode=rddlgym.SCG)
cls.compiler.batch_mode_on()

# initial state
cls.initial_state = cls.compiler.compile_initial_state(cls.batch_size)

# default action
cls.default_action = cls.compiler.compile_default_action(cls.batch_size)

# policy
cls.policy = FeedforwardPolicy(cls.compiler, {'layers': [64, 64], 'activation': 'relu', 'input_layer_norm': True})
cls.policy.build()

with cls.compiler.graph.as_default():

# reparameterization
cls.noise_shapes = rddl2tf.reparam.get_cpfs_reparameterization(cls.compiler.rddl)
cls.noise_variables = utils.get_noise_variables(cls.noise_shapes, cls.batch_size, cls.horizon)
cls.noise_inputs, cls.encoding = utils.encode_noise_as_inputs(cls.noise_variables)

# timestep
cls.timestep = tf.constant(cls.horizon, dtype=tf.float32)
cls.timestep = tf.expand_dims(cls.timestep, -1)
cls.timestep = tf.stack([cls.timestep] * cls.batch_size)

# inputs
cls.inputs = tf.concat([cls.timestep, cls.noise_inputs[:, 0, :]], axis=1)

# cell
cls.config = { 'encoding': cls.encoding }
cls.cell = ReparameterizationCell(cls.compiler, cls.policy, cls.config)

def test_call(self):
output, next_state = self.cell(self.inputs, self.initial_state)

self.assertIsInstance(output, OutputTuple)
self.assertEqual(len(output), 4)

self.assertEqual(output.state, output[0])
self.assertEqual(output.action, output[1])
self.assertEqual(output.interms, output[2])
self.assertEqual(output.reward, output[3])

self.assertEqual(output.state, next_state)

for action_fluent, default_action_fluent in zip(output.action, self.default_action):
self.assertEqual(action_fluent.shape, default_action_fluent.shape)

self.assertListEqual(output.reward.shape.as_list(), [self.batch_size, 1])

for fluent, next_fluent in zip(self.initial_state, next_state):
self.assertEqual(fluent.shape, next_fluent.shape)
4 changes: 2 additions & 2 deletions tfmdp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = '0.5.2'
__release__ = 'v0.5.2-alpha'
__version__ = '0.5.3'
__release__ = 'v0.5.3-alpha'
103 changes: 103 additions & 0 deletions tfmdp/model/cell/reparameterization_cell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# This file is part of tf-mdp.

# tf-mdp is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# tf-mdp is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with tf-mdp. If not, see <http://www.gnu.org/licenses/>.


import rddl2tf

from tfmdp.policy.drp import DeepReactivePolicy

from tfmdp.model.cell.basic_cell import BasicMarkovCell
from tfmdp.model import utils

import collections
import tensorflow as tf

from typing import Dict, Optional, Sequence, Tuple, Union

Shape = Sequence[int]
FluentPair = Tuple[str, rddl2tf.fluent.TensorFluent]

NonFluentsTensor = Sequence[tf.Tensor]
StateTensor = Sequence[tf.Tensor]
StatesTensor = Sequence[tf.Tensor]
ActionsTensor = Sequence[tf.Tensor]
IntermsTensor = Sequence[tf.Tensor]

CellOutput = Tuple[StatesTensor, ActionsTensor, IntermsTensor, tf.Tensor]
CellState = Sequence[tf.Tensor]

OutputTuple = collections.namedtuple('OutputTuple', 'state action interms reward')


class ReparameterizationCell(BasicMarkovCell):
'''ReparameterizationCell extends the tfmdp.model.cell.basic_cell class
to implement a version of a MarkovCell where all stochastic nodes
are reparameterized. Noise variables are given as inputs.
Args:
compiler (:obj:`rddl2tf.compiler.Compiler`): RDDL2TensorFlow compiler.
config (Dict): The cell configuration parameters.
'''

def __init__(self,
compiler: rddl2tf.compiler.Compiler,
policy: DeepReactivePolicy,
config: Optional[Dict] = None):
self.compiler = compiler
self.policy = policy
self.config = config

def __call__(self,
inputs: tf.Tensor,
state: Sequence[tf.Tensor],
scope: Optional[str] = None) -> Tuple[CellOutput, CellState]:
'''Returns the cell's output tuple and next state tensors.
Output tuple packs together the next state, action, interms,
and reward tensors in order.
Args:
inputs (tf.Tensor): The encoded (timestep, noise) input tensor.
state (Sequence[tf.Tensor]): The current state tensors.
scope (Optional[str]): The cell name scope.
Returns:
(CellOutput, CellState): A pair with the cell's output tuple and next state.
'''

# inputs
timestep = tf.expand_dims(inputs[:, 0], -1)

noise = inputs[:, 1:]

# noise
noise = utils.decode_inputs_as_noise(noise, self.config['encoding'])
noise = dict(noise)

# action
action = self.policy(state, timestep)

# next state
interms, next_state = self.compiler.cpfs(state, action, noise=noise)

# reward
reward = self.compiler.reward(state, action, next_state)

# outputs
next_state = utils.to_tensor(next_state)
interms = utils.to_tensor(interms)
output = OutputTuple(next_state, action, interms, reward)

return (output, next_state)
9 changes: 6 additions & 3 deletions tfmdp/model/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,15 @@ def encode_noise_as_inputs(noise_variables: Noise) -> Tuple[tf.Tensor, NoiseEnco

def decode_inputs_as_noise(inputs: tf.Tensor, encoding: NoiseEncoding) -> Noise:
noise_variables = []

for name, slices in encoding:
xi_lst = []

for start, end, shape in slices:
xi = inputs[:, start:end]
batch_size = int(xi.shape[0])
xi = tf.reshape(xi, [batch_size, *shape])
xi = inputs[:, start:end+1]
xi = tf.reshape(xi, [-1, *shape])
xi_lst.append(xi)

noise_variables.append((name, xi_lst))

return noise_variables

0 comments on commit baac40c

Please sign in to comment.