Skip to content

Commit

Permalink
refactor: overall update to use rddl2tf=0.5.11
Browse files Browse the repository at this point in the history
Signed-off-by: Thiago P. Bueno <thiago.pbueno@gmail.com>
  • Loading branch information
thiagopbueno committed Sep 14, 2020
1 parent 3281639 commit d7315c6
Show file tree
Hide file tree
Showing 8 changed files with 170 additions and 162 deletions.
61 changes: 32 additions & 29 deletions tests/test_policy.py
Expand Up @@ -13,42 +13,44 @@
# You should have received a copy of the GNU General Public License
# along with tf-rddlsim. If not, see <http://www.gnu.org/licenses/>.


import rddlgym

import rddl2tf
from rddl2tf.compiler import Compiler

from tfrddlsim.policy import DefaultPolicy, RandomPolicy
import unittest

import numpy as np
import tensorflow as tf

import unittest
import rddlgym
import rddl2tf
from rddl2tf.compilers import DefaultCompiler as Compiler
from tfrddlsim.policy import DefaultPolicy, RandomPolicy


class TestDefaultPolicy(unittest.TestCase):

@classmethod
def setUpClass(cls):
batch_size = 128

cls.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
cls.compiler1 = Compiler(cls.rddl1, batch_size)
cls.compiler1.init()

cls.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST)
cls.compiler1 = Compiler(cls.rddl1)
cls.compiler2 = Compiler(cls.rddl2)
cls.compiler2 = Compiler(cls.rddl2, batch_size)
cls.compiler2.init()

def test_default_policy(self):
for compiler in [self.compiler1, self.compiler2]:
with compiler.graph.as_default():
default = compiler.compile_default_action()
batch_size = 1000
policy = DefaultPolicy(compiler, batch_size)
default_action = compiler.default_action_fluents
policy = DefaultPolicy(compiler, compiler.batch_size)

state1 = compiler.compile_initial_state()
state1 = compiler.initial_state()
action1 = policy(state1, None)

self.assertIsInstance(action1, tuple)
for af, (_, t) in zip(action1, default):
for af, (_, t) in zip(action1, default_action):
shape = af.shape.as_list()
self.assertEqual(shape[0], batch_size)
self.assertEqual(shape[0], compiler.batch_size)
actual_shape = shape[1:]
expected_shape = t.shape.as_list()
self.assertListEqual(actual_shape, expected_shape)
Expand All @@ -62,52 +64,53 @@ class TestRandomPolicy(unittest.TestCase):

@classmethod
def setUpClass(cls):
batch_size = 128

cls.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
cls.compiler1 = Compiler(cls.rddl1, batch_size)
cls.compiler1.init()

cls.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST)
cls.compiler1 = Compiler(cls.rddl1, batch_mode=True)
cls.compiler2 = Compiler(cls.rddl2, batch_mode=True)
cls.compiler2 = Compiler(cls.rddl2, batch_size)
cls.compiler2.init()

def test_random_policy(self):
batch_size = 1000

compilers = [self.compiler1, self.compiler2]
for i, compiler in enumerate(compilers):

with compiler.graph.as_default():

policy = RandomPolicy(compiler, batch_size)
policy = RandomPolicy(compiler, compiler.batch_size)

state = compiler.compile_initial_state(batch_size)
state = compiler.initial_state()
action, n, checking = policy._sample_actions(state)

action_size = compiler.rddl.action_size
action_range_type = compiler.rddl.action_range_type
action_default_fluents = compiler.compile_default_action(batch_size)
action_default_fluents = compiler.default_action()

self.assertIsInstance(action, tuple)
self.assertEqual(len(action), len(action_size))
self.assertEqual(len(action), len(action_range_type))
for fluent, size, range_type, default in zip(action, action_size, action_range_type, action_default_fluents):
self.assertIsInstance(fluent, tf.Tensor)
self.assertListEqual(fluent.shape.as_list(), [batch_size] + list(size))
self.assertListEqual(fluent.shape.as_list(), [compiler.batch_size] + list(size))
self.assertEqual(fluent.dtype, rddl2tf.utils.range_type_to_dtype(range_type))
self.assertEqual(fluent.shape, default.shape)
self.assertEqual(fluent.dtype, default.dtype)

def test_random_policy_preconditions_checking(self):
batch_size = 1000

compilers = [self.compiler1, self.compiler2]
for i, compiler in enumerate(compilers):

with compiler.graph.as_default():

policy = RandomPolicy(compiler, batch_size)
policy = RandomPolicy(compiler, compiler.batch_size)

state = compiler.compile_initial_state(batch_size)
state = compiler.initial_state()
action, n, checking = policy._sample_actions(state)

with tf.Session() as sess:
with tf.compat.v1.Session() as sess:
n_, action_, checking_ = sess.run([n, action, checking])
self.assertTrue(np.all(checking_))
if i == 0: # reservoir: all preconditions are bound constraints
Expand Down

0 comments on commit d7315c6

Please sign in to comment.