diff --git a/ignite/distributed/auto.py b/ignite/distributed/auto.py
index 1d6a585b0e63..e92bec43a764 100644
--- a/ignite/distributed/auto.py
+++ b/ignite/distributed/auto.py
@@ -188,23 +188,23 @@ def auto_model(model: nn.Module, sync_bn: bool = False, **kwargs: Any) -> nn.Mod
     # distributed data parallel model
     if idist.get_world_size() > 1:
         bnd = idist.backend()
-        if idist.has_native_dist_support and bnd == idist_native.NCCL:
+        if idist.has_native_dist_support and bnd in (idist_native.NCCL, idist_native.GLOO, idist_native.MPI):
             if sync_bn:
                 logger.info("Convert batch norm to sync batch norm")
                 model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
 
-            if "device_ids" in kwargs:
-                raise ValueError(f"Argument kwargs should not contain 'device_ids', but got {kwargs}")
+            if torch.cuda.is_available():
+                if "device_ids" in kwargs:
+                    raise ValueError(f"Argument kwargs should not contain 'device_ids', but got {kwargs}")
 
-            lrank = idist.get_local_rank()
-            logger.info(f"Apply torch DistributedDataParallel on model, device id: {lrank}")
-            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[lrank,], **kwargs)
-        elif idist.has_native_dist_support and bnd == idist_native.GLOO:
-            if sync_bn:
-                logger.info("Convert batch norm to sync batch norm")
-                model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
+                lrank = idist.get_local_rank()
+                logger.info(f"Apply torch DistributedDataParallel on model, device id: {lrank}")
+                kwargs["device_ids"] = [
+                    lrank,
+                ]
+            else:
+                logger.info("Apply torch DistributedDataParallel on model")
 
-            logger.info("Apply torch DistributedDataParallel on model")
             model = torch.nn.parallel.DistributedDataParallel(model, **kwargs)
         elif idist.has_hvd_support and bnd == idist_hvd.HOROVOD:
             import horovod.torch as hvd
diff --git a/ignite/distributed/comp_models/native.py b/ignite/distributed/comp_models/native.py
index c723f4184347..d5bde07329cf 100644
--- a/ignite/distributed/comp_models/native.py
+++ b/ignite/distributed/comp_models/native.py
@@ -31,7 +31,7 @@ class _NativeDistModel(ComputationModel):
         In this implementation we assume the following mapping between backend and devices:
 
         - NCCL <-> GPU
-        - GLOO <-> CPU
+        - GLOO <-> CPU or GPU
         - MPI  <-> CPU
 
         """
@@ -127,7 +127,7 @@ def _create_from_backend(
             # https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
             dist.barrier()
 
-            if backend == dist.Backend.NCCL:
+            if torch.cuda.is_available():
                 torch.cuda.set_device(self._local_rank)
 
             self._setup_attrs()
@@ -140,7 +140,7 @@ def _init_from_context(self) -> None:
         def _compute_nproc_per_node(self) -> int:
             local_rank = self.get_local_rank()
             device = torch.device("cpu")
-            if self.backend() == dist.Backend.NCCL:
+            if torch.cuda.is_available():
                 # we manually set cuda device to local rank in order to avoid a hang on all_reduce
                 device = torch.device(f"cuda:{local_rank}")
             tensor = torch.tensor([self.get_local_rank() + 1]).to(device)
@@ -151,7 +151,7 @@ def _get_all_hostnames(self) -> List[Tuple[str, ...]]:
             import socket
 
             device = "cpu"
-            if self.backend() == dist.Backend.NCCL:
+            if torch.cuda.is_available():
                 index = torch.cuda.current_device()
                 device = f"cuda:{index}"
             hostname = socket.gethostname()
@@ -281,7 +281,7 @@ def get_node_rank(self) -> int:
             return cast(int, self._node)
 
         def device(self) -> torch.device:
-            if self.backend() == dist.Backend.NCCL:
+            if torch.cuda.is_available():
                 index = torch.cuda.current_device()
                 if index < self.get_local_rank():
                     warnings.warn(
diff --git a/tests/ignite/conftest.py b/tests/ignite/conftest.py
index a909d1c695e6..ce52495c32c5 100644
--- a/tests/ignite/conftest.py
+++ b/tests/ignite/conftest.py
@@ -94,7 +94,7 @@ def _create_dist_context(dist_info, lrank):
 
     dist.init_process_group(**dist_info)
     dist.barrier()
-    if dist_info["backend"] == "nccl":
+    if torch.cuda.is_available():
         torch.cuda.set_device(lrank)
 
     return {"local_rank": lrank, "world_size": dist_info["world_size"], "rank": dist_info["rank"]}
@@ -150,8 +150,6 @@ def distributed_context_single_node_nccl(local_rank, world_size):
 
     free_port = _setup_free_port(local_rank)
 
-    print(local_rank, "Port:", free_port)
-
     dist_info = {
         "backend": "nccl",
         "world_size": world_size,
@@ -174,7 +172,6 @@ def distributed_context_single_node_gloo(local_rank, world_size):
         init_method = f'file:///{temp_file.name.replace(backslash, "/")}'
     else:
         free_port = _setup_free_port(local_rank)
-        print(local_rank, "Port:", free_port)
         init_method = f"tcp://localhost:{free_port}"
         temp_file = None
 
@@ -213,7 +210,7 @@ def _create_mnodes_dist_context(dist_info, mnodes_conf):
 
     dist.init_process_group(**dist_info)
     dist.barrier()
-    if dist_info["backend"] == "nccl":
+    if torch.cuda.is_available():
         torch.cuda.device(mnodes_conf["local_rank"])
     return mnodes_conf
 
diff --git a/tests/ignite/contrib/engines/test_common.py b/tests/ignite/contrib/engines/test_common.py
index 292a3ab553dd..aed310d66bd6 100644
--- a/tests/ignite/contrib/engines/test_common.py
+++ b/tests/ignite/contrib/engines/test_common.py
@@ -56,7 +56,7 @@ def _test_setup_common_training_handlers(
     num_epochs = 10
 
     model = DummyModel().to(device)
-    if distributed and "cuda" in device:
+    if distributed and "cuda" in torch.device(device).type:
         model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank,], output_device=local_rank)
     optimizer = torch.optim.SGD(model.parameters(), lr=lr)
 
@@ -581,17 +581,19 @@ def test_setup_neptune_logging(dirname):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(dirname, distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(dirname, distributed_context_single_node_nccl):
+
     local_rank = distributed_context_single_node_nccl["local_rank"]
-    device = f"cuda:{local_rank}"
+    device = idist.device()
     _test_setup_common_training_handlers(dirname, device, rank=local_rank, local_rank=local_rank, distributed=True)
     test_add_early_stopping_by_val_score()
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(dirname, distributed_context_single_node_gloo):
-    device = "cpu"
+def test_distrib_gloo_cpu_or_gpu(dirname, distributed_context_single_node_gloo):
+
+    device = idist.device()
     local_rank = distributed_context_single_node_gloo["local_rank"]
     _test_setup_common_training_handlers(dirname, device, rank=local_rank, local_rank=local_rank, distributed=True)
     _test_setup_common_training_handlers(
@@ -606,8 +608,9 @@ def test_distrib_cpu(dirname, distributed_context_single_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(dirname, distributed_context_multi_node_gloo):
-    device = "cpu"
+def test_multinode_distrib_gloo_cpu_or_gpu(dirname, distributed_context_multi_node_gloo):
+
+    device = idist.device()
     rank = distributed_context_multi_node_gloo["rank"]
     _test_setup_common_training_handlers(dirname, device, rank=rank)
     test_add_early_stopping_by_val_score()
@@ -616,9 +619,10 @@ def test_multinode_distrib_cpu(dirname, distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(dirname, distributed_context_multi_node_nccl):
+def test_multinode_distrib_nccl_gpu(dirname, distributed_context_multi_node_nccl):
+
     local_rank = distributed_context_multi_node_nccl["local_rank"]
     rank = distributed_context_multi_node_nccl["rank"]
-    device = f"cuda:{local_rank}"
+    device = idist.device()
     _test_setup_common_training_handlers(dirname, device, rank=rank, local_rank=local_rank, distributed=True)
     test_add_early_stopping_by_val_score()
diff --git a/tests/ignite/contrib/handlers/test_clearml_logger.py b/tests/ignite/contrib/handlers/test_clearml_logger.py
index 0d2f9e7c8fcd..9ac30f7cda56 100644
--- a/tests/ignite/contrib/handlers/test_clearml_logger.py
+++ b/tests/ignite/contrib/handlers/test_clearml_logger.py
@@ -888,18 +888,21 @@ def update_fn(engine, batch):
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    _test_save_model_optimizer_lr_scheduler_with_state_dict("cpu")
-    _test_save_model_optimizer_lr_scheduler_with_state_dict("cpu", on_zero_rank=True)
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
+    _test_save_model_optimizer_lr_scheduler_with_state_dict(device)
+    _test_save_model_optimizer_lr_scheduler_with_state_dict(device, on_zero_rank=True)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
     device = idist.device()
     _test_save_model_optimizer_lr_scheduler_with_state_dict(device)
-    _test_save_model_optimizer_lr_scheduler_with_state_dict("cpu", on_zero_rank=True)
+    _test_save_model_optimizer_lr_scheduler_with_state_dict(device, on_zero_rank=True)
 
 
 @pytest.mark.tpu
diff --git a/tests/ignite/contrib/handlers/test_lr_finder.py b/tests/ignite/contrib/handlers/test_lr_finder.py
index 8ffd2536901d..c0ac998bb4af 100644
--- a/tests/ignite/contrib/handlers/test_lr_finder.py
+++ b/tests/ignite/contrib/handlers/test_lr_finder.py
@@ -539,8 +539,9 @@ def forward(self, x):
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_log_lr_and_loss(device)
     _test_distrib_integration_mnist(device)
 
@@ -548,8 +549,9 @@ def test_distrib_cpu(distributed_context_single_node_gloo):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_log_lr_and_loss(device)
     _test_distrib_integration_mnist(device)
 
diff --git a/tests/ignite/contrib/handlers/test_neptune_logger.py b/tests/ignite/contrib/handlers/test_neptune_logger.py
index b26e1e8546ca..c50ac64bcfda 100644
--- a/tests/ignite/contrib/handlers/test_neptune_logger.py
+++ b/tests/ignite/contrib/handlers/test_neptune_logger.py
@@ -516,13 +516,16 @@ def test_no_neptune_client(no_site_packages):
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    _test_neptune_saver_integration("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
+    _test_neptune_saver_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
     device = idist.device()
     _test_neptune_saver_integration(device)
diff --git a/tests/ignite/contrib/metrics/regression/test_canberra_metric.py b/tests/ignite/contrib/metrics/regression/test_canberra_metric.py
index 677e1c274313..b30a9c402e68 100644
--- a/tests/ignite/contrib/metrics/regression/test_canberra_metric.py
+++ b/tests/ignite/contrib/metrics/regression/test_canberra_metric.py
@@ -184,17 +184,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -214,8 +215,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -223,8 +225,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_fractional_absolute_error.py b/tests/ignite/contrib/metrics/regression/test_fractional_absolute_error.py
index 39c0ec6d324a..dc45bd6d0a98 100644
--- a/tests/ignite/contrib/metrics/regression/test_fractional_absolute_error.py
+++ b/tests/ignite/contrib/metrics/regression/test_fractional_absolute_error.py
@@ -191,16 +191,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -219,8 +221,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -228,8 +231,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
 
 
diff --git a/tests/ignite/contrib/metrics/regression/test_fractional_bias.py b/tests/ignite/contrib/metrics/regression/test_fractional_bias.py
index b3b32c504a32..7079b2baa438 100644
--- a/tests/ignite/contrib/metrics/regression/test_fractional_bias.py
+++ b/tests/ignite/contrib/metrics/regression/test_fractional_bias.py
@@ -197,17 +197,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -227,8 +228,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -236,8 +238,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_geometric_mean_absolute_error.py b/tests/ignite/contrib/metrics/regression/test_geometric_mean_absolute_error.py
index 299098697df3..eae47c7fa71a 100644
--- a/tests/ignite/contrib/metrics/regression/test_geometric_mean_absolute_error.py
+++ b/tests/ignite/contrib/metrics/regression/test_geometric_mean_absolute_error.py
@@ -194,17 +194,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -224,8 +225,8 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+    device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -233,8 +234,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_manhattan_distance.py b/tests/ignite/contrib/metrics/regression/test_manhattan_distance.py
index ce079eaaae4c..2ed6726db4a6 100644
--- a/tests/ignite/contrib/metrics/regression/test_manhattan_distance.py
+++ b/tests/ignite/contrib/metrics/regression/test_manhattan_distance.py
@@ -185,17 +185,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -215,8 +216,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -224,8 +226,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_maximum_absolute_error.py b/tests/ignite/contrib/metrics/regression/test_maximum_absolute_error.py
index 20c44d862148..828a9dcc1ee4 100644
--- a/tests/ignite/contrib/metrics/regression/test_maximum_absolute_error.py
+++ b/tests/ignite/contrib/metrics/regression/test_maximum_absolute_error.py
@@ -181,17 +181,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -211,8 +212,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -220,8 +222,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_mean_absolute_relative_error.py b/tests/ignite/contrib/metrics/regression/test_mean_absolute_relative_error.py
index 4841139e712f..90e8baceb497 100644
--- a/tests/ignite/contrib/metrics/regression/test_mean_absolute_relative_error.py
+++ b/tests/ignite/contrib/metrics/regression/test_mean_absolute_relative_error.py
@@ -205,17 +205,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -235,8 +236,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -244,8 +246,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_mean_normalized_bias.py b/tests/ignite/contrib/metrics/regression/test_mean_normalized_bias.py
index 41d7a46c7725..b9036287ff1c 100644
--- a/tests/ignite/contrib/metrics/regression/test_mean_normalized_bias.py
+++ b/tests/ignite/contrib/metrics/regression/test_mean_normalized_bias.py
@@ -199,17 +199,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -229,8 +230,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -238,8 +240,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_median_absolute_error.py b/tests/ignite/contrib/metrics/regression/test_median_absolute_error.py
index a77c1de06bdd..ebe063293626 100644
--- a/tests/ignite/contrib/metrics/regression/test_median_absolute_error.py
+++ b/tests/ignite/contrib/metrics/regression/test_median_absolute_error.py
@@ -199,17 +199,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -229,8 +230,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -238,8 +240,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_median_absolute_percentage_error.py b/tests/ignite/contrib/metrics/regression/test_median_absolute_percentage_error.py
index da1f89c9098b..a463b6406e05 100644
--- a/tests/ignite/contrib/metrics/regression/test_median_absolute_percentage_error.py
+++ b/tests/ignite/contrib/metrics/regression/test_median_absolute_percentage_error.py
@@ -209,17 +209,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -239,8 +240,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -248,8 +250,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_median_relative_absolute_error.py b/tests/ignite/contrib/metrics/regression/test_median_relative_absolute_error.py
index b311761f414d..06c5ab2eea53 100644
--- a/tests/ignite/contrib/metrics/regression/test_median_relative_absolute_error.py
+++ b/tests/ignite/contrib/metrics/regression/test_median_relative_absolute_error.py
@@ -200,17 +200,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -230,8 +231,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -239,8 +241,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_r2_score.py b/tests/ignite/contrib/metrics/regression/test_r2_score.py
index ad5e3fdf5460..4a87089f3304 100644
--- a/tests/ignite/contrib/metrics/regression/test_r2_score.py
+++ b/tests/ignite/contrib/metrics/regression/test_r2_score.py
@@ -171,17 +171,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -201,8 +202,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -210,8 +212,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/regression/test_wave_hedges_distance.py b/tests/ignite/contrib/metrics/regression/test_wave_hedges_distance.py
index 1fc746d5cbaf..da8ba88f7f20 100644
--- a/tests/ignite/contrib/metrics/regression/test_wave_hedges_distance.py
+++ b/tests/ignite/contrib/metrics/regression/test_wave_hedges_distance.py
@@ -166,17 +166,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -196,8 +197,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
@@ -205,8 +207,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_compute(device)
     _test_distrib_integration(device)
 
diff --git a/tests/ignite/contrib/metrics/test_average_precision.py b/tests/ignite/contrib/metrics/test_average_precision.py
index c1f2fcab1c55..7b7f55aaca0e 100644
--- a/tests/ignite/contrib/metrics/test_average_precision.py
+++ b/tests/ignite/contrib/metrics/test_average_precision.py
@@ -278,18 +278,18 @@ def update_fn(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+    device = idist.device()
     _test_distrib_binary_and_multilabel_inputs(device)
     _test_distrib_integration_binary_input(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_binary_and_multilabel_inputs(device)
     _test_distrib_integration_binary_input(device)
 
@@ -309,9 +309,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_binary_and_multilabel_inputs(device)
     _test_distrib_integration_binary_input(device)
 
@@ -319,9 +319,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
 
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+    device = idist.device()
     _test_distrib_binary_and_multilabel_inputs(device)
     _test_distrib_integration_binary_input(device)
 
diff --git a/tests/ignite/contrib/metrics/test_cohen_kappa.py b/tests/ignite/contrib/metrics/test_cohen_kappa.py
index e237794f25fc..32f9bbf9a1f3 100644
--- a/tests/ignite/contrib/metrics/test_cohen_kappa.py
+++ b/tests/ignite/contrib/metrics/test_cohen_kappa.py
@@ -273,18 +273,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+    device = idist.device()
     _test_distrib_binary_input(device)
     _test_distrib_integration_binary_input(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_binary_input(device)
     _test_distrib_integration_binary_input(device)
 
@@ -308,9 +308,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_binary_input(device)
     _test_distrib_integration_binary_input(device)
 
@@ -318,9 +318,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
 
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+    device = idist.device()
     _test_distrib_binary_input(device)
     _test_distrib_integration_binary_input(device)
 
diff --git a/tests/ignite/contrib/metrics/test_roc_auc.py b/tests/ignite/contrib/metrics/test_roc_auc.py
index 957a97bb6b7c..aa34089cdbc2 100644
--- a/tests/ignite/contrib/metrics/test_roc_auc.py
+++ b/tests/ignite/contrib/metrics/test_roc_auc.py
@@ -291,18 +291,18 @@ def update_fn(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+    device = idist.device()
     _test_distrib_binary_and_multilabel_inputs(device)
     _test_distrib_integration_binary_input(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_binary_and_multilabel_inputs(device)
     _test_distrib_integration_binary_input(device)
 
@@ -322,9 +322,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_binary_and_multilabel_inputs(device)
     _test_distrib_integration_binary_input(device)
 
@@ -332,9 +332,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
 
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+    device = idist.device()
     _test_distrib_binary_and_multilabel_inputs(device)
     _test_distrib_integration_binary_input(device)
 
diff --git a/tests/ignite/distributed/comp_models/test_base.py b/tests/ignite/distributed/comp_models/test_base.py
index b944ea61cf9f..6a7ca20d35dd 100644
--- a/tests/ignite/distributed/comp_models/test_base.py
+++ b/tests/ignite/distributed/comp_models/test_base.py
@@ -34,7 +34,7 @@ def test_serial_model():
 
 
 def test__encode_str__decode_str():
-    device = torch.device("cpu")
+    device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
     s = "test-abcedfg"
 
     encoded_s = ComputationModel._encode_str(s, device, 1024)
diff --git a/tests/ignite/distributed/comp_models/test_native.py b/tests/ignite/distributed/comp_models/test_native.py
index 61499f35c0d6..80cb480f5ab4 100644
--- a/tests/ignite/distributed/comp_models/test_native.py
+++ b/tests/ignite/distributed/comp_models/test_native.py
@@ -279,16 +279,18 @@ def _test__native_dist_model_create_from_context_dist(local_rank, rank, world_si
 @pytest.mark.distributed
 @pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Should be no-dist config")
 def test__native_dist_model_create_no_dist_gloo(clean_env):
-    _test__native_dist_model_create_from_backend_no_dist("gloo", "cpu")
-    _test__native_dist_model_create_from_context_no_dist("gloo", "cpu")
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    _test__native_dist_model_create_from_backend_no_dist("gloo", device)
+    _test__native_dist_model_create_from_context_no_dist("gloo", device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Should be no-dist config")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 def test__native_dist_model_create_no_dist_nccl(clean_env):
-    _test__native_dist_model_create_from_backend_no_dist("nccl", "cuda:0")
-    _test__native_dist_model_create_from_context_no_dist("nccl", "cuda:0")
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    _test__native_dist_model_create_from_backend_no_dist("nccl", device)
+    _test__native_dist_model_create_from_context_no_dist("nccl", device)
 
 
 @pytest.mark.distributed
@@ -297,12 +299,15 @@ def test__native_dist_model_create_dist_gloo_1(init_method, get_fixed_dirname, l
     if init_method == "FILE":
         init_method = f"file://{get_fixed_dirname('native_dist_model_create_dist_gloo_1')}/shared"
 
-    _test__native_dist_model_create_from_backend_dist(init_method, local_rank, local_rank, world_size, "gloo", "cpu")
+    device = torch.device(f"cuda:{local_rank}" if torch.cuda.is_available() else "cpu")
+    _test__native_dist_model_create_from_backend_dist(init_method, local_rank, local_rank, world_size, "gloo", device)
 
 
 @pytest.mark.distributed
 def test__native_dist_model_create_dist_gloo_2(local_rank, world_size):
-    _test__native_dist_model_create_from_context_dist(local_rank, local_rank, world_size, "gloo", "cpu")
+
+    device = torch.device(f"cuda:{local_rank}" if torch.cuda.is_available() else "cpu")
+    _test__native_dist_model_create_from_context_dist(local_rank, local_rank, world_size, "gloo", device)
 
 
 @pytest.mark.distributed
@@ -354,10 +359,7 @@ def _test_dist_spawn_fn(local_rank, backend, world_size, device):
 
     assert _model.get_local_rank() == local_rank
     assert _model.get_world_size() == world_size
-    if backend == "nccl":
-        assert _model.device() == torch.device(f"{device}:{local_rank}")
-    elif backend == "gloo":
-        assert _model.device() == torch.device(device)
+    assert _model.device().type == torch.device(device).type
 
 
 def _test__native_dist_model_spawn(backend, num_workers_per_machine, device, init_method=None, **spawn_kwargs):
@@ -379,10 +381,13 @@ def test__native_dist_model_spawn_gloo(init_method, dirname):
     if init_method == "FILE":
         init_method = f"file://{dirname}/shared"
 
-    _test__native_dist_model_spawn("gloo", num_workers_per_machine=4, device="cpu", init_method=init_method)
-    _test__native_dist_model_spawn(
-        "gloo", num_workers_per_machine=4, device="cpu", start_method="fork", init_method=init_method
-    )
+    nproc = torch.cuda.device_count() if torch.cuda.is_available() else 4
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    _test__native_dist_model_spawn("gloo", num_workers_per_machine=nproc, device=device, init_method=init_method)
+    if device.type == "cpu":
+        _test__native_dist_model_spawn(
+            "gloo", num_workers_per_machine=nproc, device=device, start_method="fork", init_method=init_method
+        )
 
 
 @pytest.mark.distributed
diff --git a/tests/ignite/distributed/test_auto.py b/tests/ignite/distributed/test_auto.py
index 0a979c1de835..e77ea3ac6dc3 100644
--- a/tests/ignite/distributed/test_auto.py
+++ b/tests/ignite/distributed/test_auto.py
@@ -78,7 +78,7 @@ def _test_auto_dataloader(ws, nproc, batch_size, num_workers=1, sampler_name=Non
 def _test_auto_model(model, ws, device, sync_bn=False, **kwargs):
     model = auto_model(model, sync_bn=sync_bn, **kwargs)
     bnd = idist.backend()
-    if ws > 1 and device in ("cuda", "cpu"):
+    if ws > 1 and torch.device(device).type in ("cuda", "cpu"):
         if idist.has_native_dist_support and bnd in ("nccl", "gloo"):
             assert isinstance(model, nn.parallel.DistributedDataParallel)
             if sync_bn:
@@ -93,8 +93,8 @@ def _test_auto_model(model, ws, device, sync_bn=False, **kwargs):
         assert isinstance(model, nn.Module)
 
     assert all(
-        [p.device.type == device for p in model.parameters()]
-    ), f"{[p.device.type for p in model.parameters()]} vs {device}"
+        [p.device.type == torch.device(device).type for p in model.parameters()]
+    ), f"{[p.device.type for p in model.parameters()]} vs {torch.device(device).type}"
 
 
 def _test_auto_model_optimizer(ws, device):
@@ -103,7 +103,7 @@ def _test_auto_model_optimizer(ws, device):
     _test_auto_model(model, ws, device)
 
     model = nn.Sequential(nn.Linear(20, 100), nn.BatchNorm1d(100))
-    _test_auto_model(model, ws, device, sync_bn="cuda" in device)
+    _test_auto_model(model, ws, device, sync_bn="cuda" in torch.device(device).type)
     if ws > 1:
         _test_auto_model(model, ws, device, find_unused_parameters=True)
         _test_auto_model(model, ws, device, find_unused_parameters=False)
@@ -138,9 +138,10 @@ def test_auto_methods_gloo(distributed_context_single_node_gloo):
     _test_auto_dataloader(ws=ws, nproc=ws, batch_size=10, num_workers=2)
     _test_auto_dataloader(ws=ws, nproc=ws, batch_size=10, sampler_name="WeightedRandomSampler")
 
-    _test_auto_model_optimizer(ws, "cpu")
+    device = idist.device()
+    _test_auto_model_optimizer(ws, device)
 
-    if ws > 1:
+    if ws > 1 and device.type == "cpu":
         with pytest.raises(AssertionError, match=r"SyncBatchNorm layers only work with GPU modules"):
             model = nn.Sequential(nn.Linear(20, 100), nn.BatchNorm1d(100))
             auto_model(model, sync_bn=True)
@@ -156,7 +157,8 @@ def test_auto_methods_nccl(distributed_context_single_node_nccl):
     _test_auto_dataloader(ws=ws, nproc=ws, batch_size=10, num_workers=10)
     _test_auto_dataloader(ws=ws, nproc=ws, batch_size=1, sampler_name="WeightedRandomSampler")
 
-    _test_auto_model_optimizer(ws, "cuda")
+    device = idist.device()
+    _test_auto_model_optimizer(ws, device)
 
     if ws > 1:
         with pytest.raises(ValueError, match=r"Argument kwargs should not contain 'device_ids'"):
diff --git a/tests/ignite/distributed/test_launcher.py b/tests/ignite/distributed/test_launcher.py
index 5a268f59632e..5dbd8146d751 100644
--- a/tests/ignite/distributed/test_launcher.py
+++ b/tests/ignite/distributed/test_launcher.py
@@ -92,7 +92,8 @@ def test_check_idist_parallel_torch_launch_n_procs_gloo(init_method, dirname, ex
     if init_method == "FILE":
         init_method = f"file://{dirname}/shared"
 
-    _test_check_idist_parallel_torch_launch(init_method, exec_filepath, "gloo", 4)
+    np = torch.cuda.device_count() if torch.cuda.is_available() else 4
+    _test_check_idist_parallel_torch_launch(init_method, exec_filepath, "gloo", np)
 
 
 @pytest.mark.distributed
@@ -150,7 +151,8 @@ def _test_check_idist_parallel_spawn(fp, backend, nprocs):
 @pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
 def test_check_idist_parallel_spawn_n_procs_gloo(exec_filepath):
-    _test_check_idist_parallel_spawn(exec_filepath, "gloo", 4)
+    np = 4 if not torch.cuda.is_available() else torch.cuda.device_count()
+    _test_check_idist_parallel_spawn(exec_filepath, "gloo", np)
 
 
 @pytest.mark.distributed
@@ -182,7 +184,7 @@ def _test_func(index, ws, device, backend, true_init_method):
     assert 0 <= index < ws
     assert index == idist.get_local_rank()
     assert ws == idist.get_world_size()
-    assert device in idist.device().type
+    assert torch.device(device).type == idist.device().type
     assert backend == idist.backend()
 
     if idist.model_name() == "native-dist":
@@ -203,8 +205,8 @@ def test_idist_parallel_spawn_n_procs_native(init_method, backend, dirname):
     if init_method == "FILE":
         init_method = f"file://{dirname}/shared"
 
-    nproc_per_node = 4 if "gloo" == backend else torch.cuda.device_count()
-    device = "cpu" if "gloo" == backend else "cuda"
+    nproc_per_node = torch.cuda.device_count() if torch.cuda.is_available() else 4
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     with idist.Parallel(backend=backend, nproc_per_node=nproc_per_node, init_method=init_method) as parallel:
         parallel.run(_test_func, ws=nproc_per_node, device=device, backend=backend, true_init_method=init_method)
 
@@ -222,14 +224,14 @@ def test_idist_parallel_n_procs_native(init_method, backend, get_fixed_dirname,
         init_method = f"file://{get_fixed_dirname('idist_parallel_n_procs_native')}/shared"
 
     os.environ["RANK"] = str(local_rank)
-    device = "cuda" if "nccl" in backend else "cpu"
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     with idist.Parallel(backend=backend, init_method=init_method) as parallel:
         parallel.run(_test_func, ws=world_size, device=device, backend=backend, true_init_method=init_method)
 
 
 @pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
 def test_idist_parallel_no_dist():
-    device = "cuda" if torch.cuda.is_available() else "cpu"
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     with idist.Parallel(backend=None) as parallel:
         parallel.run(_test_func, ws=1, device=device, backend=None, true_init_method=None)
 
diff --git a/tests/ignite/distributed/utils/__init__.py b/tests/ignite/distributed/utils/__init__.py
index 7af11ac620bd..91f15958431b 100644
--- a/tests/ignite/distributed/utils/__init__.py
+++ b/tests/ignite/distributed/utils/__init__.py
@@ -21,11 +21,10 @@ def _test_distrib_config(local_rank, backend, ws, true_device, rank=None, true_i
 
     this_device = idist.device()
     assert isinstance(this_device, torch.device)
-    if backend in ("nccl", "horovod") and "cuda" in this_device.type:
-        true_device = torch.device(f"{true_device}:{local_rank}")
-        assert this_device == true_device, f"{this_device} vs {true_device}"
+    if backend in ("nccl", "gloo", "horovod") and "cuda" in this_device.type:
+        assert this_device.type == torch.device(true_device).type, f"{this_device} vs {true_device}"
     elif backend in ("gloo", "horovod"):
-        assert this_device == torch.device(true_device)
+        assert this_device.type == torch.device(true_device).type
     elif backend == "xla-tpu":
         assert true_device in this_device.type
 
diff --git a/tests/ignite/distributed/utils/test_native.py b/tests/ignite/distributed/utils/test_native.py
index 89a93ce9f08d..55ce5ebb7647 100644
--- a/tests/ignite/distributed/utils/test_native.py
+++ b/tests/ignite/distributed/utils/test_native.py
@@ -42,8 +42,9 @@ def test_native_distrib_single_node_launch_tool_gloo(init_method, get_fixed_dirn
     if init_method == "FILE":
         init_method = f"file://{get_fixed_dirname('native_distrib_single_node_launch_tool_gloo')}/shared"
 
+    device = torch.device(f"cuda:{local_rank}" if torch.cuda.is_available() else "cpu")
     _test_native_distrib_single_node_launch_tool(
-        "gloo", "cpu", local_rank, world_size, timeout=timeout, init_method=init_method
+        "gloo", device, local_rank, world_size, timeout=timeout, init_method=init_method
     )
 
 
@@ -56,11 +57,12 @@ def test_native_distrib_single_node_launch_tool_nccl(init_method, get_fixed_dirn
     if init_method == "FILE":
         init_method = f"file://{get_fixed_dirname('native_distrib_single_node_launch_tool_nccl')}/shared"
 
-    _test_native_distrib_single_node_launch_tool("nccl", "cuda", local_rank, world_size, init_method=init_method)
+    device = torch.device(f"cuda:{local_rank}")
+    _test_native_distrib_single_node_launch_tool("nccl", device, local_rank, world_size, init_method=init_method)
 
 
 def _test_native_distrib_single_node_spawn(init_method, backend, device, **kwargs):
-    world_size = 4 if device == "cpu" else torch.cuda.device_count()
+    world_size = 4 if torch.device(device).type == "cpu" else torch.cuda.device_count()
     idist.spawn(
         backend,
         _test_distrib_config,
@@ -84,7 +86,8 @@ def test_native_distrib_single_node_spawn_gloo(init_method, dirname):
     if init_method == "FILE":
         init_method = f"file://{dirname}/shared"
 
-    _test_native_distrib_single_node_spawn(init_method, "gloo", "cpu", timeout=timeout)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    _test_native_distrib_single_node_spawn(init_method, "gloo", device, timeout=timeout)
 
 
 @pytest.mark.distributed
@@ -96,7 +99,8 @@ def test_native_distrib_single_node_spawn_nccl(init_method, dirname):
     if init_method == "FILE":
         init_method = f"file://{dirname}/shared"
 
-    _test_native_distrib_single_node_spawn(init_method, "nccl", "cuda")
+    device = torch.device("cuda")
+    _test_native_distrib_single_node_spawn(init_method, "nccl", device)
 
 
 @pytest.mark.distributed
@@ -132,7 +136,8 @@ def _test_idist_methods_in_native_context(backend, device, local_rank):
 @pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
 def test_idist_methods_in_native_gloo_context(distributed_context_single_node_gloo):
     local_rank = distributed_context_single_node_gloo["local_rank"]
-    _test_idist_methods_in_native_context("gloo", "cpu", local_rank)
+    device = torch.device(f"cuda:{local_rank}" if torch.cuda.is_available() else "cpu")
+    _test_idist_methods_in_native_context("gloo", device, local_rank)
 
 
 @pytest.mark.distributed
@@ -140,7 +145,8 @@ def test_idist_methods_in_native_gloo_context(distributed_context_single_node_gl
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 def test_idist_methods_in_native_nccl_context(distributed_context_single_node_nccl):
     local_rank = distributed_context_single_node_nccl["local_rank"]
-    _test_idist_methods_in_native_context("nccl", "cuda", local_rank)
+    device = torch.device(f"cuda:{local_rank}")
+    _test_idist_methods_in_native_context("nccl", device, local_rank)
 
 
 def _test_idist_methods_in_native_context_set_local_rank(backend, device, local_rank):
@@ -166,8 +172,10 @@ def _test_idist_methods_in_native_context_set_local_rank(backend, device, local_
 @pytest.mark.distributed
 @pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
 def test_idist_methods_in_native_gloo_context_set_local_rank(distributed_context_single_node_gloo):
+
     local_rank = distributed_context_single_node_gloo["local_rank"]
-    _test_idist_methods_in_native_context_set_local_rank("gloo", "cpu", local_rank)
+    device = idist.device()
+    _test_idist_methods_in_native_context_set_local_rank("gloo", device, local_rank)
 
 
 @pytest.mark.distributed
@@ -175,7 +183,8 @@ def test_idist_methods_in_native_gloo_context_set_local_rank(distributed_context
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 def test_idist_methods_in_native_nccl_context_set_local_rank(distributed_context_single_node_nccl):
     local_rank = distributed_context_single_node_nccl["local_rank"]
-    _test_idist_methods_in_native_context_set_local_rank("nccl", "cuda", local_rank)
+    device = idist.device()
+    _test_idist_methods_in_native_context_set_local_rank("nccl", device, local_rank)
 
 
 @pytest.mark.distributed
@@ -183,7 +192,7 @@ def test_idist_methods_in_native_nccl_context_set_local_rank(distributed_context
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 def test_idist__model_methods_nccl(distributed_context_single_node_nccl):
 
-    device = f"cuda:{distributed_context_single_node_nccl['local_rank']}"
+    device = idist.device()
     _test_distrib__get_max_length(device)
 
 
@@ -191,7 +200,7 @@ def test_idist__model_methods_nccl(distributed_context_single_node_nccl):
 @pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
 def test_idist__model_methods_gloo(distributed_context_single_node_gloo):
 
-    device = "cpu"
+    device = idist.device()
     _test_distrib__get_max_length(device)
 
 
@@ -200,7 +209,7 @@ def test_idist__model_methods_gloo(distributed_context_single_node_gloo):
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 def test_idist_all_reduce_nccl(distributed_context_single_node_nccl):
 
-    device = f"cuda:{distributed_context_single_node_nccl['local_rank']}"
+    device = idist.device()
     _test_distrib_all_reduce(device)
 
 
@@ -208,7 +217,7 @@ def test_idist_all_reduce_nccl(distributed_context_single_node_nccl):
 @pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
 def test_idist_all_reduce_gloo(distributed_context_single_node_gloo):
 
-    device = "cpu"
+    device = idist.device()
     _test_distrib_all_reduce(device)
 
 
@@ -217,7 +226,7 @@ def test_idist_all_reduce_gloo(distributed_context_single_node_gloo):
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 def test_idist_all_gather_nccl(distributed_context_single_node_nccl):
 
-    device = f"cuda:{distributed_context_single_node_nccl['local_rank']}"
+    device = idist.device()
     _test_distrib_all_gather(device)
 
 
@@ -225,7 +234,7 @@ def test_idist_all_gather_nccl(distributed_context_single_node_nccl):
 @pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
 def test_idist_all_gather_gloo(distributed_context_single_node_gloo):
 
-    device = "cpu"
+    device = idist.device()
     _test_distrib_all_gather(device)
 
 
@@ -234,7 +243,7 @@ def test_idist_all_gather_gloo(distributed_context_single_node_gloo):
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 def test_idist_broadcast_nccl(distributed_context_single_node_nccl):
 
-    device = f"cuda:{distributed_context_single_node_nccl['local_rank']}"
+    device = idist.device()
     _test_distrib_broadcast(device)
 
 
@@ -242,7 +251,7 @@ def test_idist_broadcast_nccl(distributed_context_single_node_nccl):
 @pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
 def test_idist_broadcast_gloo(distributed_context_single_node_gloo):
 
-    device = "cpu"
+    device = idist.device()
     _test_distrib_broadcast(device)
 
 
@@ -251,7 +260,7 @@ def test_idist_broadcast_gloo(distributed_context_single_node_gloo):
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 def test_idist_barrier_nccl(distributed_context_single_node_nccl):
 
-    device = f"cuda:{distributed_context_single_node_nccl['local_rank']}"
+    device = idist.device()
     _test_distrib_barrier(device)
 
 
@@ -259,7 +268,7 @@ def test_idist_barrier_nccl(distributed_context_single_node_nccl):
 @pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
 def test_idist_barrier_gloo(distributed_context_single_node_gloo):
 
-    device = "cpu"
+    device = idist.device()
     _test_distrib_barrier(device)
 
 
@@ -325,7 +334,8 @@ def test_idist_methods_overhead_nccl(distributed_context_single_node_nccl):
 @pytest.mark.distributed
 @pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
 def test_idist_one_rank_only_gloo(distributed_context_single_node_gloo):
-    device = "cpu"
+
+    device = idist.device()
     _test_distrib_one_rank_only(device=device)
     _test_distrib_one_rank_only_with_engine(device=device)
 
@@ -334,6 +344,7 @@ def test_idist_one_rank_only_gloo(distributed_context_single_node_gloo):
 @pytest.mark.skipif(not has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 def test_idist_one_rank_only_nccl(local_rank, distributed_context_single_node_nccl):
-    device = f"cuda:{local_rank}"
+
+    device = idist.device()
     _test_distrib_one_rank_only(device=device)
     _test_distrib_one_rank_only_with_engine(device=device)
diff --git a/tests/ignite/engine/test_custom_events.py b/tests/ignite/engine/test_custom_events.py
index 2a09e116b1ca..6c9bf230dce5 100644
--- a/tests/ignite/engine/test_custom_events.py
+++ b/tests/ignite/engine/test_custom_events.py
@@ -455,7 +455,7 @@ def _test(num_workers):
             data,
             batch_size=batch_size,
             num_workers=num_workers,
-            pin_memory="cuda" in device,
+            pin_memory="cuda" in torch.device(device).type,
             drop_last=True,
             shuffle=True,
         )
@@ -489,16 +489,19 @@ def test_every_event_filter_with_engine_with_dataloader():
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    _test_every_event_filter_with_engine()
-    _test_every_event_filter_with_engine_with_dataloader("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
+    _test_every_event_filter_with_engine(device)
+    _test_every_event_filter_with_engine_with_dataloader(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = f"cuda:{distributed_context_single_node_nccl['local_rank']}"
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_every_event_filter_with_engine(device)
     _test_every_event_filter_with_engine_with_dataloader(device)
 
diff --git a/tests/ignite/engine/test_deterministic.py b/tests/ignite/engine/test_deterministic.py
index c1cd8e90cb46..f7a581ba4343 100644
--- a/tests/ignite/engine/test_deterministic.py
+++ b/tests/ignite/engine/test_deterministic.py
@@ -262,7 +262,7 @@ def _test(epoch_length=None):
                     data,
                     batch_size=batch_size,
                     num_workers=num_workers,
-                    pin_memory="cuda" in device,
+                    pin_memory="cuda" in torch.device(device).type,
                     sampler=sampler,
                     drop_last=True,
                     shuffle=sampler is None,
@@ -294,7 +294,7 @@ def _(engine):
                     data,
                     batch_size=batch_size,
                     num_workers=num_workers,
-                    pin_memory="cuda" in device,
+                    pin_memory="cuda" in torch.device(device).type,
                     sampler=sampler,
                     drop_last=True,
                     shuffle=sampler is None,
@@ -370,7 +370,7 @@ def _test(epoch_length=None):
                     data,
                     batch_size=batch_size,
                     num_workers=num_workers,
-                    pin_memory="cuda" in device,
+                    pin_memory="cuda" in torch.device(device).type,
                     sampler=sampler,
                     drop_last=True,
                     shuffle=sampler is None,
@@ -401,7 +401,7 @@ def _(engine):
                     data,
                     batch_size=batch_size,
                     num_workers=num_workers,
-                    pin_memory="cuda" in device,
+                    pin_memory="cuda" in torch.device(device).type,
                     sampler=sampler,
                     drop_last=True,
                     shuffle=sampler is None,
@@ -563,16 +563,18 @@ def test_resume_random_data_iterator_from_iter():
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = f"cuda:{distributed_context_single_node_nccl['local_rank']}"
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_resume_random_dataloader_from_iter(device, setup_sampler, sampler_type="distributed")
     _test_resume_random_dataloader_from_epoch(device, setup_sampler, sampler_type="distributed")
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = "cpu"
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_resume_random_dataloader_from_iter(device, setup_sampler, sampler_type="distributed")
     _test_resume_random_dataloader_from_epoch(device, setup_sampler, sampler_type="distributed")
 
@@ -581,8 +583,9 @@ def test_distrib_cpu(distributed_context_single_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = "cpu"
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_resume_random_dataloader_from_iter(device, setup_sampler, sampler_type="distributed")
     _test_resume_random_dataloader_from_epoch(device, setup_sampler, sampler_type="distributed")
 
@@ -590,8 +593,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = f"cuda:{distributed_context_multi_node_nccl['local_rank']}"
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_resume_random_dataloader_from_iter(device, setup_sampler, sampler_type="distributed")
     _test_resume_random_dataloader_from_epoch(device, setup_sampler, sampler_type="distributed")
 
diff --git a/tests/ignite/engine/test_engine.py b/tests/ignite/engine/test_engine.py
index 3500ad4328a1..ebbdf74c5c5e 100644
--- a/tests/ignite/engine/test_engine.py
+++ b/tests/ignite/engine/test_engine.py
@@ -500,14 +500,14 @@ def test_run_check_triggered_events_on_iterator():
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
     _test_run_check_triggered_events_on_iterator()
     _test_run_check_triggered_events()
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
     _test_run_check_triggered_events_on_iterator()
     _test_run_check_triggered_events()
 
@@ -515,7 +515,7 @@ def test_distrib_cpu(distributed_context_single_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
     _test_run_check_triggered_events_on_iterator()
     _test_run_check_triggered_events()
 
@@ -523,7 +523,7 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
     _test_run_check_triggered_events_on_iterator()
     _test_run_check_triggered_events()
 
diff --git a/tests/ignite/handlers/test_checkpoint.py b/tests/ignite/handlers/test_checkpoint.py
index 2319be61f92b..de35a1eb4247 100644
--- a/tests/ignite/handlers/test_checkpoint.py
+++ b/tests/ignite/handlers/test_checkpoint.py
@@ -1188,8 +1188,9 @@ def _test_checkpoint_load_objects_ddp(device):
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo, get_rank_zero_dirname):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo, get_rank_zero_dirname):
+
+    device = idist.device()
     dirname = get_rank_zero_dirname()
     _test_save_model_optimizer_lr_scheduler_with_state_dict(device, os.path.join(dirname, "1"))
     _test_save_model_optimizer_lr_scheduler_with_state_dict(device, os.path.join(dirname, "2"), on_zero_rank=True)
@@ -1200,7 +1201,8 @@ def test_distrib_cpu(distributed_context_single_node_gloo, get_rank_zero_dirname
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl, get_rank_zero_dirname):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl, get_rank_zero_dirname):
+
     device = idist.device()
     dirname = get_rank_zero_dirname()
     _test_save_model_optimizer_lr_scheduler_with_state_dict(device, os.path.join(dirname, "1"))
diff --git a/tests/ignite/handlers/test_early_stopping.py b/tests/ignite/handlers/test_early_stopping.py
index 712338e11f45..66b96f757042 100644
--- a/tests/ignite/handlers/test_early_stopping.py
+++ b/tests/ignite/handlers/test_early_stopping.py
@@ -336,16 +336,18 @@ def evaluation(engine):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
-    device = f"cuda:{local_rank}"
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_with_engine_early_stopping(device)
     _test_distrib_integration_engine_early_stopping(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(local_rank, distributed_context_single_node_gloo):
-    device = "cpu"
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_with_engine_early_stopping(device)
     _test_distrib_integration_engine_early_stopping(device)
 
@@ -353,8 +355,9 @@ def test_distrib_cpu(local_rank, distributed_context_single_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = "cpu"
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_with_engine_early_stopping(device)
     _test_distrib_integration_engine_early_stopping(device)
 
@@ -362,7 +365,8 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = f"cuda:{distributed_context_multi_node_nccl['local_rank']}"
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_with_engine_early_stopping(device)
     _test_distrib_integration_engine_early_stopping(device)
diff --git a/tests/ignite/metrics/nlp/test_bleu.py b/tests/ignite/metrics/nlp/test_bleu.py
index c98143cf98d2..745678449830 100644
--- a/tests/ignite/metrics/nlp/test_bleu.py
+++ b/tests/ignite/metrics/nlp/test_bleu.py
@@ -184,15 +184,17 @@ def _test(metric_device):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{local_rank}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
@@ -210,16 +212,18 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
diff --git a/tests/ignite/metrics/nlp/test_rouge.py b/tests/ignite/metrics/nlp/test_rouge.py
index 40aafae189c2..7102a6f98f12 100644
--- a/tests/ignite/metrics/nlp/test_rouge.py
+++ b/tests/ignite/metrics/nlp/test_rouge.py
@@ -176,15 +176,17 @@ def _test(metric_device):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{local_rank}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
@@ -202,16 +204,18 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
diff --git a/tests/ignite/metrics/test_accumulation.py b/tests/ignite/metrics/test_accumulation.py
index e47a9273e583..1034c3cfd21c 100644
--- a/tests/ignite/metrics/test_accumulation.py
+++ b/tests/ignite/metrics/test_accumulation.py
@@ -425,9 +425,9 @@ def _test_apex_average(device, amp_mode, opt_level):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+    device = idist.device()
     _test_distrib_variable_accumulation(device)
     _test_distrib_average(device)
     _test_distrib_geom_average(device)
@@ -437,21 +437,9 @@ def test_distrib_gpu(distributed_context_single_node_nccl):
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-
-    device = torch.device("cpu")
-    _test_distrib_variable_accumulation(device)
-    _test_distrib_average(device)
-    _test_distrib_geom_average(device)
-    _test_distrib_integration(device)
-    _test_distrib_accumulator_device(device)
-
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-@pytest.mark.multinode_distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_variable_accumulation(device)
     _test_distrib_average(device)
     _test_distrib_geom_average(device)
@@ -464,7 +452,7 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
 def test_distrib_hvd(gloo_hvd_executor):
 
-    device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
+    device = idist.device()
     nproc = 4 if not torch.cuda.is_available() else torch.cuda.device_count()
 
     gloo_hvd_executor(_test_distrib_variable_accumulation, (device,), np=nproc, do_init=True)
@@ -474,22 +462,11 @@ def test_distrib_hvd(gloo_hvd_executor):
     gloo_hvd_executor(_test_distrib_accumulator_device, (device,), np=nproc, do_init=True)
 
 
-@pytest.mark.multinode_distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
-    _test_distrib_variable_accumulation(device)
-    _test_distrib_average(device)
-    _test_distrib_geom_average(device)
-    _test_distrib_integration(device)
-    _test_distrib_accumulator_device(device)
-
-
 @pytest.mark.tpu
 @pytest.mark.skipif("NUM_TPU_WORKERS" in os.environ, reason="Skip if NUM_TPU_WORKERS is in env vars")
 @pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
 def test_distrib_single_device_xla():
+
     device = idist.device()
     _test_distrib_variable_accumulation(device)
     _test_distrib_average(device)
@@ -523,3 +500,29 @@ def test_apex_average_on_cuda():
     _test_apex_average(device, amp_mode="apex", opt_level="O1")
     _test_apex_average(device, amp_mode="apex", opt_level="O2")
     _test_apex_average(device, amp_mode="apex", opt_level="O3")
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
+    _test_distrib_variable_accumulation(device)
+    _test_distrib_average(device)
+    _test_distrib_geom_average(device)
+    _test_distrib_integration(device)
+    _test_distrib_accumulator_device(device)
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
+    _test_distrib_variable_accumulation(device)
+    _test_distrib_average(device)
+    _test_distrib_geom_average(device)
+    _test_distrib_integration(device)
+    _test_distrib_accumulator_device(device)
diff --git a/tests/ignite/metrics/test_accuracy.py b/tests/ignite/metrics/test_accuracy.py
index 6f84735cc3ee..c997d2f62a0a 100644
--- a/tests/ignite/metrics/test_accuracy.py
+++ b/tests/ignite/metrics/test_accuracy.py
@@ -503,8 +503,9 @@ def _test_distrib_accumulator_device(device):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_single_node_nccl['local_rank']}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_multilabel_input_NHW(device)
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
@@ -513,9 +514,9 @@ def test_distrib_gpu(distributed_context_single_node_nccl):
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_multilabel_input_NHW(device)
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
@@ -536,22 +537,19 @@ def test_distrib_hvd(gloo_hvd_executor):
     gloo_hvd_executor(_test_distrib_accumulator_device, (device,), np=nproc, do_init=True)
 
 
-@pytest.mark.multinode_distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+@pytest.mark.tpu
+@pytest.mark.skipif("NUM_TPU_WORKERS" in os.environ, reason="Skip if NUM_TPU_WORKERS is in env vars")
+@pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
+def test_distrib_single_device_xla():
+    device = idist.device()
     _test_distrib_multilabel_input_NHW(device)
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
 
 
-@pytest.mark.multinode_distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def _test_distrib_xla_nprocs(index):
+    device = idist.device()
     _test_distrib_multilabel_input_NHW(device)
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
@@ -559,9 +557,18 @@ def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
 
 
 @pytest.mark.tpu
-@pytest.mark.skipif("NUM_TPU_WORKERS" in os.environ, reason="Skip if NUM_TPU_WORKERS is in env vars")
+@pytest.mark.skipif("NUM_TPU_WORKERS" not in os.environ, reason="Skip if no NUM_TPU_WORKERS in env vars")
 @pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
-def test_distrib_single_device_xla():
+def test_distrib_xla_nprocs(xmp_executor):
+    n = int(os.environ["NUM_TPU_WORKERS"])
+    xmp_executor(_test_distrib_xla_nprocs, args=(), nprocs=n)
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
     device = idist.device()
     _test_distrib_multilabel_input_NHW(device)
     _test_distrib_integration_multiclass(device)
@@ -569,17 +576,13 @@ def test_distrib_single_device_xla():
     _test_distrib_accumulator_device(device)
 
 
-def _test_distrib_xla_nprocs(index):
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
     device = idist.device()
     _test_distrib_multilabel_input_NHW(device)
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
-
-
-@pytest.mark.tpu
-@pytest.mark.skipif("NUM_TPU_WORKERS" not in os.environ, reason="Skip if no NUM_TPU_WORKERS in env vars")
-@pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
-def test_distrib_xla_nprocs(xmp_executor):
-    n = int(os.environ["NUM_TPU_WORKERS"])
-    xmp_executor(_test_distrib_xla_nprocs, args=(), nprocs=n)
diff --git a/tests/ignite/metrics/test_classification_report.py b/tests/ignite/metrics/test_classification_report.py
index 3a15987a6a04..57b80cfcf364 100644
--- a/tests/ignite/metrics/test_classification_report.py
+++ b/tests/ignite/metrics/test_classification_report.py
@@ -141,23 +141,23 @@ def update(engine, i):
             _test(metric_device, 2, ["0", "1", "2", "3", "4", "5", "6"])
 
 
-@pytest.mark.multinode_distributed
+@pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
+@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+    device = idist.device()
     _test_integration_multiclass(device, True)
     _test_integration_multiclass(device, False)
     _test_integration_multilabel(device, True)
     _test_integration_multilabel(device, False)
 
 
-@pytest.mark.multinode_distributed
+@pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(local_rank, distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_integration_multiclass(device, True)
     _test_integration_multiclass(device, False)
     _test_integration_multilabel(device, True)
@@ -165,13 +165,17 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 
 
 @pytest.mark.distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(local_rank, distributed_context_single_node_gloo):
-    device = torch.device("cpu")
-    _test_integration_multiclass(device, True)
-    _test_integration_multiclass(device, False)
-    _test_integration_multilabel(device, True)
-    _test_integration_multilabel(device, False)
+@pytest.mark.skipif(not idist.has_hvd_support, reason="Skip if no Horovod dist support")
+@pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
+def test_distrib_hvd(gloo_hvd_executor):
+
+    device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
+    nproc = 4 if not torch.cuda.is_available() else torch.cuda.device_count()
+
+    gloo_hvd_executor(_test_integration_multiclass, (device, True), np=nproc, do_init=True)
+    gloo_hvd_executor(_test_integration_multiclass, (device, False), np=nproc, do_init=True)
+    gloo_hvd_executor(_test_integration_multilabel, (device, True), np=nproc, do_init=True)
+    gloo_hvd_executor(_test_integration_multilabel, (device, False), np=nproc, do_init=True)
 
 
 def _test_distrib_xla_nprocs(index):
@@ -197,3 +201,27 @@ def to_numpy_multilabel(y):
     num_classes = y.shape[0]
     y = y.reshape((num_classes, -1)).transpose(1, 0)
     return y
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
+    _test_integration_multiclass(device, True)
+    _test_integration_multiclass(device, False)
+    _test_integration_multilabel(device, True)
+    _test_integration_multilabel(device, False)
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
+    _test_integration_multiclass(device, True)
+    _test_integration_multiclass(device, False)
+    _test_integration_multilabel(device, True)
+    _test_integration_multilabel(device, False)
diff --git a/tests/ignite/metrics/test_confusion_matrix.py b/tests/ignite/metrics/test_confusion_matrix.py
index 1745b532408f..af85e060d9b3 100644
--- a/tests/ignite/metrics/test_confusion_matrix.py
+++ b/tests/ignite/metrics/test_confusion_matrix.py
@@ -589,18 +589,18 @@ def _test(average=None):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{local_rank}")
+    device = idist.device()
     _test_distrib_multiclass_images(device)
     _test_distrib_accumulator_device(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_multiclass_images(device)
     _test_distrib_accumulator_device(device)
 
@@ -617,24 +617,6 @@ def test_distrib_hvd(gloo_hvd_executor):
     gloo_hvd_executor(_test_distrib_accumulator_device, (device,), np=nproc, do_init=True)
 
 
-@pytest.mark.multinode_distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
-    _test_distrib_multiclass_images(device)
-    _test_distrib_accumulator_device(device)
-
-
-@pytest.mark.multinode_distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
-    _test_distrib_multiclass_images(device)
-    _test_distrib_accumulator_device(device)
-
-
 @pytest.mark.tpu
 @pytest.mark.skipif("NUM_TPU_WORKERS" in os.environ, reason="Skip if NUM_TPU_WORKERS is in env vars")
 @pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
@@ -656,3 +638,23 @@ def _test_distrib_xla_nprocs(index):
 def test_distrib_xla_nprocs(xmp_executor):
     n = int(os.environ["NUM_TPU_WORKERS"])
     xmp_executor(_test_distrib_xla_nprocs, args=(), nprocs=n)
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
+    _test_distrib_multiclass_images(device)
+    _test_distrib_accumulator_device(device)
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
+    _test_distrib_multiclass_images(device)
+    _test_distrib_accumulator_device(device)
diff --git a/tests/ignite/metrics/test_epoch_metric.py b/tests/ignite/metrics/test_epoch_metric.py
index ef660a16be41..70296d484180 100644
--- a/tests/ignite/metrics/test_epoch_metric.py
+++ b/tests/ignite/metrics/test_epoch_metric.py
@@ -193,14 +193,18 @@ def assert_data_fn(all_preds, all_targets):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
-    _test_distrib_integration(device="cuda")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
+    _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    _test_distrib_integration(device="cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
+    _test_distrib_integration(device)
 
 
 @pytest.mark.tpu
diff --git a/tests/ignite/metrics/test_fbeta.py b/tests/ignite/metrics/test_fbeta.py
index 8e38d516140c..27eb28905189 100644
--- a/tests/ignite/metrics/test_fbeta.py
+++ b/tests/ignite/metrics/test_fbeta.py
@@ -146,15 +146,17 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{local_rank}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
@@ -169,22 +171,6 @@ def test_distrib_hvd(gloo_hvd_executor):
     gloo_hvd_executor(_test_distrib_integration, (device,), np=nproc, do_init=True)
 
 
-@pytest.mark.multinode_distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
-    _test_distrib_integration(device)
-
-
-@pytest.mark.multinode_distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
-    _test_distrib_integration(device)
-
-
 @pytest.mark.tpu
 @pytest.mark.skipif("NUM_TPU_WORKERS" in os.environ, reason="Skip if NUM_TPU_WORKERS is in env vars")
 @pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
@@ -204,3 +190,21 @@ def _test_distrib_xla_nprocs(index):
 def test_distrib_xla_nprocs(xmp_executor):
     n = int(os.environ["NUM_TPU_WORKERS"])
     xmp_executor(_test_distrib_xla_nprocs, args=(), nprocs=n)
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
+    _test_distrib_integration(device)
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
+    _test_distrib_integration(device)
diff --git a/tests/ignite/metrics/test_loss.py b/tests/ignite/metrics/test_loss.py
index 353933b0ee04..23749b960072 100644
--- a/tests/ignite/metrics/test_loss.py
+++ b/tests/ignite/metrics/test_loss.py
@@ -181,18 +181,18 @@ def test_sum_detached():
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{local_rank}")
+    device = idist.device()
     _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
     _test_distrib_accumulator_device(device, y_test_1())
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
     _test_distrib_accumulator_device(device, y_test_1())
 
@@ -209,34 +209,18 @@ def test_distrib_hvd(gloo_hvd_executor):
     gloo_hvd_executor(_test_distrib_accumulator_device, (device, y_test_1()), np=nproc, do_init=True)
 
 
-@pytest.mark.multinode_distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
-    _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2(), tol=1e-6)
-    _test_distrib_accumulator_device(device, y_test_1())
-
-
-@pytest.mark.multinode_distributed
-@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
-    _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
-    _test_distrib_accumulator_device(device, y_test_1())
-
-
 @pytest.mark.tpu
 @pytest.mark.skipif("NUM_TPU_WORKERS" in os.environ, reason="Skip if NUM_TPU_WORKERS is in env vars")
 @pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
 def test_distrib_single_device_xla():
+
     device = idist.device()
     _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
     _test_distrib_accumulator_device(device, y_test_1())
 
 
 def _test_distrib_xla_nprocs(index):
+
     device = idist.device()
     _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
     _test_distrib_accumulator_device(device, y_test_1())
@@ -248,3 +232,23 @@ def _test_distrib_xla_nprocs(index):
 def test_distrib_xla_nprocs(xmp_executor):
     n = int(os.environ["NUM_TPU_WORKERS"])
     xmp_executor(_test_distrib_xla_nprocs, args=(), nprocs=n)
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
+    _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2(), tol=1e-6)
+    _test_distrib_accumulator_device(device, y_test_1())
+
+
+@pytest.mark.multinode_distributed
+@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
+@pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
+    _test_distrib_compute_on_criterion(device, y_test_1(), y_test_2())
+    _test_distrib_accumulator_device(device, y_test_1())
diff --git a/tests/ignite/metrics/test_mean_absolute_error.py b/tests/ignite/metrics/test_mean_absolute_error.py
index cfd662dedddf..0e9e2a75140f 100644
--- a/tests/ignite/metrics/test_mean_absolute_error.py
+++ b/tests/ignite/metrics/test_mean_absolute_error.py
@@ -129,16 +129,18 @@ def test_accumulator_detached():
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{local_rank}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -158,8 +160,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -167,8 +170,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -177,6 +181,7 @@ def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
 @pytest.mark.skipif("NUM_TPU_WORKERS" in os.environ, reason="Skip if NUM_TPU_WORKERS is in env vars")
 @pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
 def test_distrib_single_device_xla():
+
     device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
diff --git a/tests/ignite/metrics/test_mean_pairwise_distance.py b/tests/ignite/metrics/test_mean_pairwise_distance.py
index 2e042fd4703d..4ba5bdf4ec00 100644
--- a/tests/ignite/metrics/test_mean_pairwise_distance.py
+++ b/tests/ignite/metrics/test_mean_pairwise_distance.py
@@ -138,16 +138,18 @@ def test_accumulator_detached():
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{local_rank}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -167,8 +169,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -176,8 +179,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
diff --git a/tests/ignite/metrics/test_mean_squared_error.py b/tests/ignite/metrics/test_mean_squared_error.py
index 5f5a169d2770..a1df3fb3a5cf 100644
--- a/tests/ignite/metrics/test_mean_squared_error.py
+++ b/tests/ignite/metrics/test_mean_squared_error.py
@@ -131,17 +131,18 @@ def test_accumulator_detached():
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{local_rank}")
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -161,8 +162,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -170,8 +172,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
diff --git a/tests/ignite/metrics/test_metric.py b/tests/ignite/metrics/test_metric.py
index 06997139281b..8664d6c230aa 100644
--- a/tests/ignite/metrics/test_metric.py
+++ b/tests/ignite/metrics/test_metric.py
@@ -668,18 +668,18 @@ def _test_creating_on_xla_fails(device):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = f"cuda:{distributed_context_single_node_nccl['local_rank']}"
+    device = idist.device()
     _test_distrib_sync_all_reduce_decorator(device)
     _test_invalid_sync_all_reduce(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = "cpu"
+    device = idist.device()
     _test_distrib_sync_all_reduce_decorator(device)
     _test_invalid_sync_all_reduce(device)
 
@@ -699,8 +699,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = "cpu"
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_sync_all_reduce_decorator(device)
     _test_invalid_sync_all_reduce(device)
 
@@ -708,8 +709,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = f"cuda:{distributed_context_multi_node_nccl['local_rank']}"
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_sync_all_reduce_decorator(device)
     _test_invalid_sync_all_reduce(device)
 
diff --git a/tests/ignite/metrics/test_metrics_lambda.py b/tests/ignite/metrics/test_metrics_lambda.py
index 618cb35fd965..1376f414fc9b 100644
--- a/tests/ignite/metrics/test_metrics_lambda.py
+++ b/tests/ignite/metrics/test_metrics_lambda.py
@@ -402,18 +402,18 @@ def update(engine, i):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{local_rank}")
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_metrics_on_diff_devices(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(local_rank, distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_integration(device)
 
 
@@ -432,16 +432,18 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_metrics_on_diff_devices(device)
 
diff --git a/tests/ignite/metrics/test_multilabel_confusion_matrix.py b/tests/ignite/metrics/test_multilabel_confusion_matrix.py
index c4ea2bc2987a..01c959332fb8 100644
--- a/tests/ignite/metrics/test_multilabel_confusion_matrix.py
+++ b/tests/ignite/metrics/test_multilabel_confusion_matrix.py
@@ -359,18 +359,18 @@ def test_simple_batched():
 # @pytest.mark.distributed
 # @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 # @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-# def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
+# def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-#     device = torch.device(f"cuda:{local_rank}")
+#     device = idist.device()
 #     _test_distrib_multiclass_images(device)
 #     _test_distrib_accumulator_device(device)
 
 
 # @pytest.mark.distributed
 # @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-# def test_distrib_cpu(distributed_context_single_node_gloo):
+# def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-#     device = torch.device("cpu")
+#     device = idist.device()
 #     _test_distrib_multiclass_images(device)
 #     _test_distrib_accumulator_device(device)
 
@@ -390,8 +390,9 @@ def test_simple_batched():
 # @pytest.mark.multinode_distributed
 # @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 # @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-# def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-#     device = torch.device("cpu")
+# def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+#
+#     device = idist.device()
 #     _test_distrib_multiclass_images(device)
 #     _test_distrib_accumulator_device(device)
 
@@ -399,8 +400,9 @@ def test_simple_batched():
 # @pytest.mark.multinode_distributed
 # @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 # @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-# def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-#     device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+# def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+#
+#     device = idist.device()
 #     _test_distrib_multiclass_images(device)
 #     _test_distrib_accumulator_device(device)
 
diff --git a/tests/ignite/metrics/test_precision.py b/tests/ignite/metrics/test_precision.py
index bff02cc65c27..be2f6a909fd9 100644
--- a/tests/ignite/metrics/test_precision.py
+++ b/tests/ignite/metrics/test_precision.py
@@ -518,8 +518,9 @@ def _test(average, metric_device):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{local_rank}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
@@ -528,8 +529,9 @@ def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(local_rank, distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
@@ -553,8 +555,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
@@ -564,8 +567,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
diff --git a/tests/ignite/metrics/test_psnr.py b/tests/ignite/metrics/test_psnr.py
index e1abf0f1fff7..f39fbd46e763 100644
--- a/tests/ignite/metrics/test_psnr.py
+++ b/tests/ignite/metrics/test_psnr.py
@@ -239,8 +239,9 @@ def _test_distrib_accumulator_device(device):
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = "cpu"
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -248,8 +249,9 @@ def test_distrib_cpu(distributed_context_single_node_gloo):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
-    device = f"cuda:{local_rank}"
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -257,8 +259,9 @@ def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = "cpu"
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -266,8 +269,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = f"cuda:{distributed_context_multi_node_nccl['local_rank']}"
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -276,6 +280,7 @@ def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
 @pytest.mark.skipif("NUM_TPU_WORKERS" in os.environ, reason="Skip if NUM_TPU_WORKERS is in env vars")
 @pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
 def test_distrib_single_device_xla():
+
     device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
diff --git a/tests/ignite/metrics/test_recall.py b/tests/ignite/metrics/test_recall.py
index fe9b14e93dc8..8f3cdb9c67e4 100644
--- a/tests/ignite/metrics/test_recall.py
+++ b/tests/ignite/metrics/test_recall.py
@@ -519,8 +519,9 @@ def _test(average, metric_device):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{local_rank}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
@@ -529,8 +530,9 @@ def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
@@ -554,8 +556,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
@@ -565,8 +568,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration_multiclass(device)
     _test_distrib_integration_multilabel(device)
     _test_distrib_accumulator_device(device)
diff --git a/tests/ignite/metrics/test_root_mean_squared_error.py b/tests/ignite/metrics/test_root_mean_squared_error.py
index e5c66616456f..7c0ccee4d60b 100644
--- a/tests/ignite/metrics/test_root_mean_squared_error.py
+++ b/tests/ignite/metrics/test_root_mean_squared_error.py
@@ -103,17 +103,17 @@ def _test(metric_device):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{local_rank}")
+    device = idist.device()
     _test_distrib_integration(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(local_rank, distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_integration(device)
 
 
@@ -131,16 +131,18 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
 
 
diff --git a/tests/ignite/metrics/test_running_average.py b/tests/ignite/metrics/test_running_average.py
index 2e06ad589ecf..857e929c8509 100644
--- a/tests/ignite/metrics/test_running_average.py
+++ b/tests/ignite/metrics/test_running_average.py
@@ -392,9 +392,9 @@ def _test_distrib_accumulator_device(device):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = torch.device(f"cuda:{local_rank}")
+    device = idist.device()
     _test_distrib_on_output(device)
     _test_distrib_on_metric(device)
     _test_distrib_accumulator_device(device)
@@ -402,9 +402,9 @@ def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
 
-    device = torch.device("cpu")
+    device = idist.device()
     _test_distrib_on_output(device)
     _test_distrib_on_metric(device)
     _test_distrib_accumulator_device(device)
@@ -426,8 +426,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_on_output(device)
     _test_distrib_on_metric(device)
     _test_distrib_accumulator_device(device)
@@ -436,8 +437,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_on_output(device)
     _test_distrib_on_metric(device)
     _test_distrib_accumulator_device(device)
diff --git a/tests/ignite/metrics/test_ssim.py b/tests/ignite/metrics/test_ssim.py
index d612bd521920..ae62768eda45 100644
--- a/tests/ignite/metrics/test_ssim.py
+++ b/tests/ignite/metrics/test_ssim.py
@@ -193,17 +193,18 @@ def _test_distrib_accumulator_device(device):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
 
-    device = f"cuda:{local_rank}"
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(distributed_context_single_node_gloo):
-    device = "cpu"
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -211,8 +212,9 @@ def test_distrib_cpu(distributed_context_single_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = "cpu"
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -220,8 +222,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = f"cuda:{distributed_context_multi_node_nccl['local_rank']}"
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
diff --git a/tests/ignite/metrics/test_top_k_categorical_accuracy.py b/tests/ignite/metrics/test_top_k_categorical_accuracy.py
index e2e4bb39ad85..2282248a96a1 100644
--- a/tests/ignite/metrics/test_top_k_categorical_accuracy.py
+++ b/tests/ignite/metrics/test_top_k_categorical_accuracy.py
@@ -128,16 +128,18 @@ def _test_distrib_accumulator_device(device):
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
-def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
-    device = torch.device(f"cuda:{local_rank}")
+def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
 
 @pytest.mark.distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
-def test_distrib_cpu(local_rank, distributed_context_single_node_gloo):
-    device = torch.device("cpu")
+def test_distrib_gloo_cpu_or_gpu(distributed_context_single_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -157,8 +159,9 @@ def test_distrib_hvd(gloo_hvd_executor):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
-    device = torch.device("cpu")
+def test_multinode_distrib_gloo_cpu_or_gpu(distributed_context_multi_node_gloo):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -166,8 +169,9 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
 @pytest.mark.multinode_distributed
 @pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
 @pytest.mark.skipif("GPU_MULTINODE_DISTRIB" not in os.environ, reason="Skip if not multi-node distributed")
-def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
-    device = torch.device(f"cuda:{distributed_context_multi_node_nccl['local_rank']}")
+def test_multinode_distrib_nccl_gpu(distributed_context_multi_node_nccl):
+
+    device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
@@ -176,12 +180,14 @@ def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
 @pytest.mark.skipif("NUM_TPU_WORKERS" in os.environ, reason="Skip if NUM_TPU_WORKERS is in env vars")
 @pytest.mark.skipif(not idist.has_xla_support, reason="Skip if no PyTorch XLA package")
 def test_distrib_single_device_xla():
+
     device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)
 
 
 def _test_distrib_xla_nprocs(index):
+
     device = idist.device()
     _test_distrib_integration(device)
     _test_distrib_accumulator_device(device)