refactor: overall update to use rddl2tf=0.5.11

Signed-off-by: Thiago P. Bueno <thiago.pbueno@gmail.com>
thiagopbueno · Sep 14, 2020 · d7315c6 · d7315c6
1 parent 3281639
commit d7315c6
Show file tree

Hide file tree

Showing 8 changed files with 170 additions and 162 deletions.
diff --git a/tests/test_policy.py b/tests/test_policy.py
@@ -13,42 +13,44 @@
 # You should have received a copy of the GNU General Public License
 # along with tf-rddlsim. If not, see <http://www.gnu.org/licenses/>.
 
-
-import rddlgym
-
-import rddl2tf
-from rddl2tf.compiler import Compiler
-
-from tfrddlsim.policy import DefaultPolicy, RandomPolicy
+import unittest
 
 import numpy as np
 import tensorflow as tf
 
-import unittest
+import rddlgym
+import rddl2tf
+from rddl2tf.compilers import DefaultCompiler as Compiler
+from tfrddlsim.policy import DefaultPolicy, RandomPolicy
 
 
 class TestDefaultPolicy(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
+        batch_size = 128
+
         cls.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
+        cls.compiler1 = Compiler(cls.rddl1, batch_size)
+        cls.compiler1.init()
+
         cls.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST)
-        cls.compiler1 = Compiler(cls.rddl1)
-        cls.compiler2 = Compiler(cls.rddl2)
+        cls.compiler2 = Compiler(cls.rddl2, batch_size)
+        cls.compiler2.init()
 
     def test_default_policy(self):
         for compiler in [self.compiler1, self.compiler2]:
             with compiler.graph.as_default():
-                default = compiler.compile_default_action()
-                batch_size = 1000
-                policy = DefaultPolicy(compiler, batch_size)
+                default_action = compiler.default_action_fluents
+                policy = DefaultPolicy(compiler, compiler.batch_size)
 
-                state1 = compiler.compile_initial_state()
+                state1 = compiler.initial_state()
                 action1 = policy(state1, None)
+
                 self.assertIsInstance(action1, tuple)
-                for af, (_, t) in zip(action1, default):
+                for af, (_, t) in zip(action1, default_action):
                     shape = af.shape.as_list()
-                    self.assertEqual(shape[0], batch_size)
+                    self.assertEqual(shape[0], compiler.batch_size)
                     actual_shape = shape[1:]
                     expected_shape = t.shape.as_list()
                     self.assertListEqual(actual_shape, expected_shape)
@@ -62,52 +64,53 @@ class TestRandomPolicy(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
+        batch_size = 128
+
         cls.rddl1 = rddlgym.make('Reservoir-8', mode=rddlgym.AST)
+        cls.compiler1 = Compiler(cls.rddl1, batch_size)
+        cls.compiler1.init()
+
         cls.rddl2 = rddlgym.make('Mars_Rover', mode=rddlgym.AST)
-        cls.compiler1 = Compiler(cls.rddl1, batch_mode=True)
-        cls.compiler2 = Compiler(cls.rddl2, batch_mode=True)
+        cls.compiler2 = Compiler(cls.rddl2, batch_size)
+        cls.compiler2.init()
 
     def test_random_policy(self):
-        batch_size = 1000
-
         compilers = [self.compiler1, self.compiler2]
         for i, compiler in enumerate(compilers):
 
             with compiler.graph.as_default():
 
-                policy = RandomPolicy(compiler, batch_size)
+                policy = RandomPolicy(compiler, compiler.batch_size)
 
-                state = compiler.compile_initial_state(batch_size)
+                state = compiler.initial_state()
                 action, n, checking = policy._sample_actions(state)
 
                 action_size = compiler.rddl.action_size
                 action_range_type = compiler.rddl.action_range_type
-                action_default_fluents = compiler.compile_default_action(batch_size)
+                action_default_fluents = compiler.default_action()
 
                 self.assertIsInstance(action, tuple)
                 self.assertEqual(len(action), len(action_size))
                 self.assertEqual(len(action), len(action_range_type))
                 for fluent, size, range_type, default in zip(action, action_size, action_range_type, action_default_fluents):
                     self.assertIsInstance(fluent, tf.Tensor)
-                    self.assertListEqual(fluent.shape.as_list(), [batch_size] + list(size))
+                    self.assertListEqual(fluent.shape.as_list(), [compiler.batch_size] + list(size))
                     self.assertEqual(fluent.dtype, rddl2tf.utils.range_type_to_dtype(range_type))
                     self.assertEqual(fluent.shape, default.shape)
                     self.assertEqual(fluent.dtype, default.dtype)
 
     def test_random_policy_preconditions_checking(self):
-        batch_size = 1000
-
         compilers = [self.compiler1, self.compiler2]
         for i, compiler in enumerate(compilers):
 
             with compiler.graph.as_default():
 
-                policy = RandomPolicy(compiler, batch_size)
+                policy = RandomPolicy(compiler, compiler.batch_size)
 
-                state = compiler.compile_initial_state(batch_size)
+                state = compiler.initial_state()
                 action, n, checking = policy._sample_actions(state)
 
-                with tf.Session() as sess:
+                with tf.compat.v1.Session() as sess:
                     n_, action_, checking_  = sess.run([n, action, checking])
                     self.assertTrue(np.all(checking_))
                     if i == 0: # reservoir: all preconditions are bound constraints