From d9666fb4ef9f83b35eb084b3de964ee352cf4bd6 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Tue, 26 Jan 2021 20:07:19 -0800 Subject: [PATCH 1/5] torch.cuda.memory_allocated to return `{}` if not initialized Fixes #49952 --- torch/cuda/memory.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/torch/cuda/memory.py b/torch/cuda/memory.py index c0bde95de741..102ca1cb2e9f 100644 --- a/torch/cuda/memory.py +++ b/torch/cuda/memory.py @@ -193,6 +193,8 @@ def _recurse_add_to_result(prefix, obj): def memory_stats_as_nested_dict(device: Union[Device, int] = None) -> Dict[str, Any]: r"""Returns the result of :func:`~torch.cuda.memory_stats` as a nested dictionary.""" + if not is_initialized(): + return {} device = _get_device_index(device, optional=True) return torch._C._cuda_memoryStats(device) @@ -303,7 +305,7 @@ def memory_allocated(device: Union[Device, int] = None) -> int: needs to be created on GPU. See :ref:`cuda-memory-management` for more details about GPU memory management. """ - return memory_stats(device=device)["allocated_bytes.all.current"] + return memory_stats(device=device).get("allocated_bytes.all.current", 0) def max_memory_allocated(device: Union[Device, int] = None) -> int: @@ -325,7 +327,7 @@ def max_memory_allocated(device: Union[Device, int] = None) -> int: See :ref:`cuda-memory-management` for more details about GPU memory management. """ - return memory_stats(device=device)["allocated_bytes.all.peak"] + return memory_stats(device=device).get("allocated_bytes.all.peak", 0) def memory_reserved(device: Union[Device, int] = None) -> int: @@ -341,7 +343,7 @@ def memory_reserved(device: Union[Device, int] = None) -> int: See :ref:`cuda-memory-management` for more details about GPU memory management. """ - return memory_stats(device=device)["reserved_bytes.all.current"] + return memory_stats(device=device).get("reserved_bytes.all.current", 0) def max_memory_reserved(device: Union[Device, int] = None) -> int: @@ -363,7 +365,7 @@ def max_memory_reserved(device: Union[Device, int] = None) -> int: See :ref:`cuda-memory-management` for more details about GPU memory management. """ - return memory_stats(device=device)["reserved_bytes.all.peak"] + return memory_stats(device=device).get("reserved_bytes.all.peak", 0) def memory_cached(device: Union[Device, int] = None) -> int: From 7e0d81891a08cf10aed283bfe4e53fd20d00d175 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 27 Jan 2021 10:11:22 -0800 Subject: [PATCH 2/5] Add handling for torch.cuda.device as an argument --- torch/cuda/_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch/cuda/_utils.py b/torch/cuda/_utils.py index 8f4105623a98..7f6b70a037bb 100644 --- a/torch/cuda/_utils.py +++ b/torch/cuda/_utils.py @@ -29,6 +29,8 @@ def _get_device_index(device: Union[Device, str, int, None], optional: bool = Fa raise ValueError('Expected a cuda or cpu device, but got: {}'.format(device)) elif device.type != 'cuda': raise ValueError('Expected a cuda device, but got: {}'.format(device)) + if isinstance(device, torch.cuda.device): + return device.idx return _torch_get_device_index(device, optional, allow_cpu) From 8f3a3b063a3fc9f09a33c717e83f8c0f63109ecb Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 27 Jan 2021 14:17:33 -0800 Subject: [PATCH 3/5] Added test that verifies torch.cuda.memory_allocated works with torch.cuda.device --- test/test_cuda.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/test_cuda.py b/test/test_cuda.py index 3d52c99df856..e847f372d5c4 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -3496,6 +3496,15 @@ class TestNamedTupleInput_1(NamedTuple): self.assertEqual(expected_a, x.a) self.assertEqual(expected_b, x.b) + @unittest.skipIf(not TEST_MULTIGPU, "Test needs multiple GPUs") + def test_cuda_device_memory_allocated(self): + from torch.cuda import memory_allocated + device_count = torch.cuda.device_count() + current_alloc = [memory_allocated(idx) for idx in range(device_count)] + x = torch.ones(10, device="cuda:0") + self.assertTrue(torch.cuda.memory_allocated(0) > current_alloc[0]) + self.assertTrue(all(memory_allocated(torch.cuda.device(idx)) == current_alloc[idx] for idx in range(1, device_count))) + if __name__ == '__main__': run_tests() From 63bc8ecbc65bc923ee9029eb499e1594f5673064 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 27 Jan 2021 17:05:04 -0800 Subject: [PATCH 4/5] Update test --- test/test_cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cuda.py b/test/test_cuda.py index e847f372d5c4..a49ffa5e1bec 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -3502,7 +3502,7 @@ def test_cuda_device_memory_allocated(self): device_count = torch.cuda.device_count() current_alloc = [memory_allocated(idx) for idx in range(device_count)] x = torch.ones(10, device="cuda:0") - self.assertTrue(torch.cuda.memory_allocated(0) > current_alloc[0]) + self.assertTrue(memory_allocated(0) > current_alloc[0]) self.assertTrue(all(memory_allocated(torch.cuda.device(idx)) == current_alloc[idx] for idx in range(1, device_count))) From 776ed5c371344fedcd780cb8ab599e831ef2f704 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 27 Jan 2021 17:10:36 -0800 Subject: [PATCH 5/5] Move the test to TestCuda vs TestCudaComm class --- test/test_cuda.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/test/test_cuda.py b/test/test_cuda.py index a49ffa5e1bec..ed1d82a91112 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -3113,6 +3113,16 @@ def test_batch_norm_gather_stats(self): self.assertEqual(mean, torch.ones(3, device='cuda')) self.assertEqual(invstd, torch.ones(3, device='cuda')) + @unittest.skipIf(not TEST_MULTIGPU, "Test needs multiple GPUs") + def test_cuda_device_memory_allocated(self): + from torch.cuda import memory_allocated + device_count = torch.cuda.device_count() + current_alloc = [memory_allocated(idx) for idx in range(device_count)] + x = torch.ones(10, device="cuda:0") + self.assertTrue(memory_allocated(0) > current_alloc[0]) + self.assertTrue(all(memory_allocated(torch.cuda.device(idx)) == current_alloc[idx] for idx in range(1, device_count))) + + class TestCudaComm(TestCase): def _test_broadcast(self, input): if not TEST_MULTIGPU: @@ -3496,15 +3506,6 @@ class TestNamedTupleInput_1(NamedTuple): self.assertEqual(expected_a, x.a) self.assertEqual(expected_b, x.b) - @unittest.skipIf(not TEST_MULTIGPU, "Test needs multiple GPUs") - def test_cuda_device_memory_allocated(self): - from torch.cuda import memory_allocated - device_count = torch.cuda.device_count() - current_alloc = [memory_allocated(idx) for idx in range(device_count)] - x = torch.ones(10, device="cuda:0") - self.assertTrue(memory_allocated(0) > current_alloc[0]) - self.assertTrue(all(memory_allocated(torch.cuda.device(idx)) == current_alloc[idx] for idx in range(1, device_count))) - if __name__ == '__main__': run_tests()