diff --git a/test/model_level_tests/models/MLP/test1/expected.json b/test/model_level_tests/models/MLP/test1/expected.json
index 53aeed6a3..b432d0335 100644
--- a/test/model_level_tests/models/MLP/test1/expected.json
+++ b/test/model_level_tests/models/MLP/test1/expected.json
@@ -7,9 +7,9 @@
                 "num_nodes_marked_for_clustering": 0
             },
             "1": {
-                "num_ng_clusters": 17,
-                "num_nodes_in_graph": 310,
-                "num_nodes_marked_for_clustering": 244
+                "num_ng_clusters": 15,
+                "num_nodes_in_graph": 311,
+                "num_nodes_marked_for_clustering": 249
             }
         },
         "time": 10
@@ -22,9 +22,24 @@
                 "num_nodes_marked_for_clustering": 0
             },
             "1": {
-                "num_ng_clusters": 17,
-                "num_nodes_in_graph": 329,
-                "num_nodes_marked_for_clustering": 262
+                "num_ng_clusters": 15,
+                "num_nodes_in_graph": 330,
+                "num_nodes_marked_for_clustering": 267
+            }
+        },
+        "time": 10
+    },
+    "varopts": {
+        "logparse": {
+            "0": {
+                "num_ng_clusters": 0,
+                "num_nodes_in_graph": 50,
+                "num_nodes_marked_for_clustering": 0
+            },
+            "1": {
+                "num_ng_clusters": 15,
+                "num_nodes_in_graph": 312,
+                "num_nodes_marked_for_clustering": 250
             }
         },
         "time": 10
diff --git a/test/model_level_tests/models/MLP/test2/expected.json b/test/model_level_tests/models/MLP/test2/expected.json
index ffbf6c47b..b80f3f53d 100644
--- a/test/model_level_tests/models/MLP/test2/expected.json
+++ b/test/model_level_tests/models/MLP/test2/expected.json
@@ -7,9 +7,9 @@
                 "num_nodes_marked_for_clustering": 0
             },
             "1": {
-                "num_ng_clusters": 21,
-                "num_nodes_in_graph": 448,
-                "num_nodes_marked_for_clustering": 358
+                "num_ng_clusters": 19,
+                "num_nodes_in_graph": 449,
+                "num_nodes_marked_for_clustering": 363
             }
         },
         "time": 13
@@ -22,9 +22,24 @@
                 "num_nodes_marked_for_clustering": 0
             },
             "1": {
-                "num_ng_clusters": 21,
-                "num_nodes_in_graph": 478,
-                "num_nodes_marked_for_clustering": 387
+                "num_ng_clusters": 19,
+                "num_nodes_in_graph": 479,
+                "num_nodes_marked_for_clustering": 392
+            }
+        },
+        "time": 13
+    },
+    "varopts": {
+        "logparse": {
+            "0": {
+                "num_ng_clusters": 0,
+                "num_nodes_in_graph": 83,
+                "num_nodes_marked_for_clustering": 0
+            },
+            "1": {
+                "num_ng_clusters": 19,
+                "num_nodes_in_graph": 450,
+                "num_nodes_marked_for_clustering": 364
             }
         },
         "time": 13
diff --git a/test/model_level_tests/test_main.py b/test/model_level_tests/test_main.py
index 8452e0bc9..989f66142 100644
--- a/test/model_level_tests/test_main.py
+++ b/test/model_level_tests/test_main.py
@@ -434,7 +434,8 @@ def get_disabled_tests_info():
     failed_tests = {}
     skipped_tests = {}
     for test_suite in requested_test_suites:
-        print('Testing model/test-suite: ' + test_suite)
+        print('\n' + '=' * 20 + 'Testing model/test-suite: ' + test_suite +
+              '=' * 20)
         if test_suite not in disabled_test_suite:
             if args.run_basic_tests:
                 passed_tests_in_suite, failed_tests_in_suite, skipped_tests_in_suite = run_test_suite(
@@ -450,6 +451,8 @@ def get_disabled_tests_info():
     print('Passed:\n' + '\033[92m' + print_format(passed_tests) + '\033[0m')
     print('Skipped:\n' + '\033[93m' + print_format(skipped_tests) + '\033[0m')
     print('Failed:\n' + '\033[91m' + print_format(failed_tests) + '\033[0m')
+    all_tests_passed = all([len(failed_tests[k]) == 0 for k in failed_tests])
+    exit(0 if all_tests_passed else 1)
 
 # TODO add a test comparing with TF run?
 # TODO verbose or quiet?
@@ -464,21 +467,4 @@ def get_disabled_tests_info():
 # Level3: parse prints we put. These tests are run without "NGRAPH_TF_LOG_PLACEMENT=1". the framework can provide some default parsers, but users are free to add pyscripts that provide functions for custom script parsers
 # These tests can be long
 # So we can offer options to do: {1}, {1,2}, {1,2,3}, {3}  (or do we allow options for any combination of tests?)
-# NOTE: Level3 and Level1 test are same (mechanics wise). Merge them. Then we have only 2 types of tests
-
-# Each model dir represents 1 repo to download. A model dir can have multiple sub tests (each sub-test could represent a different model, or the same model tested under different settings)
-
-# Structure of "expected json"
-# dictionary of expected values. key is a config, value is the expected values json. there is a "default" config, but one can add other configs (for example for other backends etc)
-
-# Sample run script:
-# python test_main.py --run_logparse_tests --models MLP
-
-# feature 1: dumps shell script at the end. dumps shell script even when the framework crashes
-# feature 2: prints list of tests and their descriptions (--list)
-# feature 3: "expected" values can be varied by different configs
-# feature 4: cleanup script
-# feature 5: sub tests folders must start with 'test' (else ignored). Can have 'disabled' in their names to disable
-# feature 6: default and user-specified log parsers (named custom_log_parser.py, which is expected to contain a function custom_parse_logs)
-# feature 7: filename is supposed to be expected.json
-# feature 8: enable_ngraph can be placed in each test dir or in the model dir for all subtests to share. test folder's patch overrides global model folder patch
+# NOTE: Level3 and Level1 test are same (mechanics wise). We have only 2 types of tests, though Level2 is unimplemented for now
diff --git a/test/python/test_conv2D_KernelChecks.py b/test/python/test_conv2D_KernelChecks.py
index 5edd79478..2284478e2 100644
--- a/test/python/test_conv2D_KernelChecks.py
+++ b/test/python/test_conv2D_KernelChecks.py
@@ -63,7 +63,11 @@ def make_filter_and_backprop_args(self):
 
         return x1, x2
 
-    def test_conv2d_stride_in_batch_not_supported(self):
+    @pytest.mark.parametrize(("strides",), (
+        ([2, 1, 1, 1],),
+        ([1, 1, 1, 2],),
+    ))
+    def test_conv2d_stride_in_batch_not_supported(self, strides):
         inp_values, filt_values = self.make_filter_and_backprop_args()
 
         def run_test(sess):
@@ -80,19 +84,6 @@ def run_test(sess):
             self.with_ngraph(run_test)
         assert "Strides in batch and depth dimensions is not supported: Conv2D" in excinfo.value.message
 
-    def test_conv2d_stride_in_depth_not_supported(self):
-        inp_values, filt_values = self.make_filter_and_backprop_args()
-
-        def run_test(sess):
-            inp = array_ops.placeholder(dtypes.float32)
-            filt = array_ops.placeholder(dtypes.float32)
-            return sess.run(
-                nn_ops.conv2d(inp, filt, strides=[1, 1, 1, 2], padding="SAME"),
-                {
-                    inp: inp_values,
-                    filt: filt_values
-                })
-
-        with pytest.raises(Exception) as excinfo:
-            self.with_ngraph(run_test)
-        assert "Strides in batch and depth dimensions is not supported: Conv2D" in excinfo.value.message
+        # TF also fails
+        with pytest.raises(Exception) as excinfo1:
+            self.without_ngraph(run_test)
diff --git a/test/python/test_modeltester.py b/test/python/test_modeltester.py
index aa0cde81d..3bb3c4b0a 100644
--- a/test/python/test_modeltester.py
+++ b/test/python/test_modeltester.py
@@ -27,6 +27,7 @@
 
 from common import NgraphTest
 from tools.build_utils import command_executor
+import ngraph_bridge
 
 
 class TestModelTester(NgraphTest):
@@ -35,8 +36,18 @@ class TestModelTester(NgraphTest):
     def test_MLP(self):
         cwd = os.getcwd()
         os.chdir('../model_level_tests/')
+        grappler = ngraph_bridge.is_grappler_enabled()
+        varopts = ngraph_bridge.are_variables_enabled()
+        if grappler:
+            if varopts:
+                assert False, "Varopts and grappler does not build together right now"
+            else:
+                config = "grappler"
+        else:
+            config = "varopts" if varopts else "default"
         try:
             command_executor(
-                "python test_main.py --run_basic_tests --models MLP")
+                "python test_main.py --run_basic_tests --models MLP --configuration "
+                + config)
         finally:
             os.chdir(cwd)