diff --git a/test/expect/ModelTester.test_deeplabv3_resnet101_expect.pkl b/test/expect/ModelTester.test_deeplabv3_resnet101_expect.pkl
new file mode 100644
index 00000000000..ac9c251e4e9
Binary files /dev/null and b/test/expect/ModelTester.test_deeplabv3_resnet101_expect.pkl differ
diff --git a/test/expect/ModelTester.test_deeplabv3_resnet50_expect.pkl b/test/expect/ModelTester.test_deeplabv3_resnet50_expect.pkl
new file mode 100644
index 00000000000..70e70a92e2c
Binary files /dev/null and b/test/expect/ModelTester.test_deeplabv3_resnet50_expect.pkl differ
diff --git a/test/expect/ModelTester.test_fcn_resnet101_expect.pkl b/test/expect/ModelTester.test_fcn_resnet101_expect.pkl
new file mode 100644
index 00000000000..ae6f21bb95c
Binary files /dev/null and b/test/expect/ModelTester.test_fcn_resnet101_expect.pkl differ
diff --git a/test/expect/ModelTester.test_fcn_resnet50_expect.pkl b/test/expect/ModelTester.test_fcn_resnet50_expect.pkl
new file mode 100644
index 00000000000..29a2382ed89
Binary files /dev/null and b/test/expect/ModelTester.test_fcn_resnet50_expect.pkl differ
diff --git a/test/test_models.py b/test/test_models.py
index 28236598177..14880425aed 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -59,6 +59,10 @@ def get_available_video_models():
     "resnet101",
     "resnet152",
     "wide_resnet101_2",
+    "deeplabv3_resnet50",
+    "deeplabv3_resnet101",
+    "fcn_resnet50",
+    "fcn_resnet101",
 )
 
 
@@ -85,21 +89,53 @@ def _test_classification_model(self, name, input_shape, dev):
                 self.assertEqual(out.shape[-1], 50)
 
     def _test_segmentation_model(self, name, dev):
-        # passing num_class equal to a number other than 1000 helps in making the test
-        # more enforcing in nature
-        model = models.segmentation.__dict__[name](num_classes=50, pretrained_backbone=False)
+        set_rng_seed(0)
+        # passing num_classes equal to a number other than 21 helps in making the test's
+        # expected file size smaller
+        model = models.segmentation.__dict__[name](num_classes=10, pretrained_backbone=False)
         model.eval().to(device=dev)
-        input_shape = (1, 3, 300, 300)
+        input_shape = (1, 3, 32, 32)
         # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
         x = torch.rand(input_shape).to(device=dev)
-        out = model(x)
-        self.assertEqual(tuple(out["out"].shape), (1, 50, 300, 300))
+        out = model(x)["out"]
+
+        def check_out(out):
+            prec = 0.01
+            strip_suffix = f"_{dev}"
+            try:
+                # We first try to assert the entire output if possible. This is not
+                # only the best way to assert results but also handles the cases
+                # where we need to create a new expected result.
+                self.assertExpected(out.cpu(), prec=prec, strip_suffix=strip_suffix)
+            except AssertionError:
+                # Unfortunately some segmentation models are flaky with autocast
+                # so instead of validating the probability scores, check that the class
+                # predictions match.
+                expected_file = self._get_expected_file(strip_suffix=strip_suffix)
+                expected = torch.load(expected_file)
+                self.assertEqual(out.argmax(dim=1), expected.argmax(dim=1), prec=prec)
+                return False  # Partial validation performed
+
+            return True  # Full validation performed
+
+        full_validation = check_out(out)
+
         self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
 
         if dev == torch.device("cuda"):
             with torch.cuda.amp.autocast():
-                out = model(x)
-                self.assertEqual(tuple(out["out"].shape), (1, 50, 300, 300))
+                out = model(x)["out"]
+                # See autocast_flaky_numerics comment at top of file.
+                if name not in autocast_flaky_numerics:
+                    full_validation &= check_out(out)
+
+        if not full_validation:
+            msg = "The output of {} could only be partially validated. " \
+                  "This is likely due to unit-test flakiness, but you may " \
+                  "want to do additional manual checks if you made " \
+                  "significant changes to the codebase.".format(self._testMethodName)
+            warnings.warn(msg, RuntimeWarning)
+            raise unittest.SkipTest(msg)
 
     def _test_detection_model(self, name, dev):
         set_rng_seed(0)