diff --git a/tests/ignite/metrics/test_loss.py b/tests/ignite/metrics/test_loss.py
index 1d855571fb6c..353933b0ee04 100644
--- a/tests/ignite/metrics/test_loss.py
+++ b/tests/ignite/metrics/test_loss.py
@@ -11,6 +11,26 @@
 from ignite.metrics import Loss
 
 
+def y_test_1(requires_grad=False, device=None):
+    return (
+        torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]], device=device, requires_grad=requires_grad).log(),
+        torch.tensor([2, 2], device=device).long(),
+        1.1512925625,
+    )
+
+
+def y_test_2():
+    return (
+        torch.tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2], [0.2, 0.7, 0.1]]).log(),
+        torch.tensor([2, 0, 2]).long(),
+        1.1253643036,
+    )
+
+
+def y_test_3():
+    return torch.tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2]]).log(), torch.tensor([2, 0]).long()
+
+
 def test_zero_div():
     loss = Loss(nll_loss)
     with pytest.raises(NotComputableError, match=r"Loss must have at least one example before it can be computed"):
@@ -18,38 +38,39 @@ def test_zero_div():
 
 
 def test_compute():
-    loss = Loss(nll_loss)
+    def _test(y_test_1, y_test_2):
+        loss = Loss(nll_loss)
 
-    y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]]).log()
-    y = torch.tensor([2, 2]).long()
-    loss.update((y_pred, y))
-    assert_almost_equal(loss.compute(), 1.1512925625)
+        y_pred, y, expected_loss = y_test_1
+        loss.update((y_pred, y))
+        assert_almost_equal(loss.compute(), expected_loss)
 
-    y_pred = torch.tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2], [0.2, 0.7, 0.1]]).log()
-    y = torch.tensor([2, 0, 2]).long()
-    loss.update((y_pred, y))
-    assert_almost_equal(loss.compute(), 1.1253643036)  # average
+        y_pred, y, expected_loss = y_test_2
+        loss.update((y_pred, y))
+        assert_almost_equal(loss.compute(), expected_loss)  # average
+
+    _test(y_test_1(), y_test_2())
 
 
 def test_compute_on_criterion():
-    loss = Loss(nn.NLLLoss())
+    def _test(y_test_1, y_test_2):
+        loss = Loss(nn.NLLLoss())
 
-    y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]]).log()
-    y = torch.tensor([2, 2]).long()
-    loss.update((y_pred, y))
-    assert_almost_equal(loss.compute(), 1.1512925625)
+        y_pred, y, expected_loss = y_test_1
+        loss.update((y_pred, y))
+        assert_almost_equal(loss.compute(), expected_loss)
 
-    y_pred = torch.tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2], [0.2, 0.7, 0.1]]).log()
-    y = torch.tensor([2, 0, 2]).long()
-    loss.update((y_pred, y))
-    assert_almost_equal(loss.compute(), 1.1253643036)  # average
+        y_pred, y, expected_loss = y_test_2
+        loss.update((y_pred, y))
+        assert_almost_equal(loss.compute(), expected_loss)  # average
+
+    _test(y_test_1(), y_test_2())
 
 
 def test_non_averaging_loss():
     loss = Loss(nn.NLLLoss(reduction="none"))
 
-    y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]]).log()
-    y = torch.tensor([2, 2]).long()
+    y_pred, y, _ = y_test_1()
     with pytest.raises(ValueError):
         loss.update((y_pred, y))
 
@@ -75,8 +96,7 @@ def loss_fn(y_pred, x):
 def test_kwargs_loss():
     loss = Loss(nll_loss)
 
-    y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]]).log()
-    y = torch.tensor([2, 2]).long()
+    y_pred, y, _ = y_test_1()
     loss.update((y_pred, y, {"weight": torch.tensor([0, 0, 0], dtype=torch.float)}))
     assert_almost_equal(loss.compute(), 0)
 
@@ -84,8 +104,7 @@ def test_kwargs_loss():
 def test_reset():
     loss = Loss(nll_loss)
 
-    y_pred = torch.tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2]]).log()
-    y = torch.tensor([2, 0]).long()
+    y_pred, y = y_test_3()
     loss.update((y_pred, y))
     loss.compute()
     loss.reset()
@@ -93,13 +112,12 @@ def test_reset():
         loss.compute()
 
 
-def _test_distrib_compute_on_criterion(device, tol=None):
-    def _test(metric_device):
+def _test_distrib_compute_on_criterion(device, y_test_1, y_test_2, tol=None):
+    def _test(metric_device, y_test_1, y_test_2):
         criterion = nn.NLLLoss().to(device)
         loss = Loss(criterion, device=metric_device)
 
-        y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]], device=device).log()
-        y = torch.tensor([2, 2], device=device).long()
+        y_pred, y, _ = y_test_1
         loss.update((y_pred, y))
         n = loss._num_examples
         assert n == len(y)
@@ -112,8 +130,7 @@ def _test(metric_device):
         assert_almost_equal(res, true_loss_value.item())
 
         loss.reset()
-        y_pred = torch.tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2], [0.2, 0.7, 0.1]], device=device).log()
-        y = torch.tensor([2, 0, 2], device=device).long()
+        y_pred, y, _ = y_test_2
         loss.update((y_pred, y))
         n = loss._num_examples
         res = loss.compute()
@@ -127,12 +144,12 @@ def _test(metric_device):
         else:
             assert pytest.approx(res, rel=tol) == true_loss_value.item()
 
-    _test("cpu")
+    _test("cpu", y_test_1, y_test_2)
     if device.type != "xla":
-        _test(idist.device())
+        _test(idist.device(), y_test_1, y_test_2)
 
 
-def _test_distrib_accumulator_device(device):
+def _test_distrib_accumulator_device(device, y_test_1):
 
     metric_devices = [torch.device("cpu")]
     if device.type != "xla":
@@ -144,8 +161,7 @@ def _test_distrib_accumulator_device(device):
             loss._sum.device == metric_device
         ), f"{type(loss._sum.device)}:{loss._sum.device} vs {type(metric_device)}:{metric_device}"
 
-        y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]]).log()
-        y = torch.tensor([2, 2]).long()
+        y_pred, y, _ = y_test_1
         loss.update((y_pred, y))
 
         assert (
@@ -156,8 +172,7 @@ def _test_distrib_accumulator_device(device):
 def test_sum_detached():
     loss = Loss(nll_loss)
 
-    y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]], requires_grad=True).log()
-    y = torch.tensor([2, 2]).long()
+    y_pred, y, _ = y_test_1(requires_grad=True)
     loss.update((y_pred, y))
 
     assert not loss._sum.requires_grad
@@ -169,8 +184,8 @@ def test_sum_detached():
 def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
 
     device = torch.device(f"cuda:{local_rank}")
-    _test_distrib_compute_on_criterion(device)
-    _test_distrib_accumulator_device(device)
+    _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
+    _test_distrib_accumulator_device(device, y_test_1())
 
 
 @pytest.mark.distributed
@@ -178,8 +193,8 @@ def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
 def test_distrib_cpu(distributed_context_single_node_gloo):
 
     device = torch.device("cpu")
-    _test_distrib_compute_on_criterion(device)
-    _test_distrib_accumulator_device(device)
+    _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
+    _test_distrib_accumulator_device(device, y_test_1())
 
 
 @pytest.mark.distributed
@@ -190,8 +205,8 @@ def test_distrib_hvd(gloo_hvd_executor):
     device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
     nproc = 4 if not torch.cuda.is_available() else torch.cuda.device_count()
 
-    gloo_hvd_executor(_test_distrib_compute_on_criterion, (device,), np=nproc, do_init=True)
-    gloo_hvd_executor(_test_distrib_accumulator_device, (device,), np=nproc, do_init=True)
+    gloo_hvd_executor(_test_distrib_compute_on_criterion, (device, y_test_1(), y_test_2()), np=nproc, do_init=True)
+    gloo_hvd_executor(_test_distrib_accumulator_device, (device, y_test_1()), np=nproc, do_init=True)
 
 
 @pytest.mark.multinode_distributed
@@ -199,8 +214,8 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
 def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
     device = torch.device("cpu")
-    _test_distrib_compute_on_criterion(device, tol=1e-6)
-    _test_distrib_accumulator_device(device)
+    _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2(), tol=1e-6)
+    _test_distrib_accumulator_device(device, y_test_1())
 
 
 @pytest.mark.multinode_distributed
@@ -208,8 +223,8 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
 def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
     device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
-    _test_distrib_compute_on_criterion(device)
-    _test_distrib_accumulator_device(device)
+    _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
+    _test_distrib_accumulator_device(device, y_test_1())
 
 
 @pytest.mark.tpu
@@ -217,14 +232,14 @@ def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
 @pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
 def test_distrib_single_device_xla():
     device = idist.device()
-    _test_distrib_compute_on_criterion(device)
-    _test_distrib_accumulator_device(device)
+    _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
+    _test_distrib_accumulator_device(device, y_test_1())
 
 
 def _test_distrib_xla_nprocs(index):
     device = idist.device()
-    _test_distrib_compute_on_criterion(device)
-    _test_distrib_accumulator_device(device)
+    _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
+    _test_distrib_accumulator_device(device, y_test_1())
 
 
 @pytest.mark.tpu