From d9666fb4ef9f83b35eb084b3de964ee352cf4bd6 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Tue, 26 Jan 2021 20:07:19 -0800
Subject: [PATCH 1/5] torch.cuda.memory_allocated to return `{}` if not
 initialized

Fixes #49952
---
 torch/cuda/memory.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/torch/cuda/memory.py b/torch/cuda/memory.py
index c0bde95de741..102ca1cb2e9f 100644
--- a/torch/cuda/memory.py
+++ b/torch/cuda/memory.py
@@ -193,6 +193,8 @@ def _recurse_add_to_result(prefix, obj):
 
 def memory_stats_as_nested_dict(device: Union[Device, int] = None) -> Dict[str, Any]:
     r"""Returns the result of :func:`~torch.cuda.memory_stats` as a nested dictionary."""
+    if not is_initialized():
+        return {}
     device = _get_device_index(device, optional=True)
     return torch._C._cuda_memoryStats(device)
 
@@ -303,7 +305,7 @@ def memory_allocated(device: Union[Device, int] = None) -> int:
         needs to be created on GPU. See :ref:`cuda-memory-management` for more
         details about GPU memory management.
     """
-    return memory_stats(device=device)["allocated_bytes.all.current"]
+    return memory_stats(device=device).get("allocated_bytes.all.current", 0)
 
 
 def max_memory_allocated(device: Union[Device, int] = None) -> int:
@@ -325,7 +327,7 @@ def max_memory_allocated(device: Union[Device, int] = None) -> int:
         See :ref:`cuda-memory-management` for more details about GPU memory
         management.
     """
-    return memory_stats(device=device)["allocated_bytes.all.peak"]
+    return memory_stats(device=device).get("allocated_bytes.all.peak", 0)
 
 
 def memory_reserved(device: Union[Device, int] = None) -> int:
@@ -341,7 +343,7 @@ def memory_reserved(device: Union[Device, int] = None) -> int:
         See :ref:`cuda-memory-management` for more details about GPU memory
         management.
     """
-    return memory_stats(device=device)["reserved_bytes.all.current"]
+    return memory_stats(device=device).get("reserved_bytes.all.current", 0)
 
 
 def max_memory_reserved(device: Union[Device, int] = None) -> int:
@@ -363,7 +365,7 @@ def max_memory_reserved(device: Union[Device, int] = None) -> int:
         See :ref:`cuda-memory-management` for more details about GPU memory
         management.
     """
-    return memory_stats(device=device)["reserved_bytes.all.peak"]
+    return memory_stats(device=device).get("reserved_bytes.all.peak", 0)
 
 
 def memory_cached(device: Union[Device, int] = None) -> int:

From 7e0d81891a08cf10aed283bfe4e53fd20d00d175 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Wed, 27 Jan 2021 10:11:22 -0800
Subject: [PATCH 2/5] Add handling for torch.cuda.device as an argument

---
 torch/cuda/_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/torch/cuda/_utils.py b/torch/cuda/_utils.py
index 8f4105623a98..7f6b70a037bb 100644
--- a/torch/cuda/_utils.py
+++ b/torch/cuda/_utils.py
@@ -29,6 +29,8 @@ def _get_device_index(device: Union[Device, str, int, None], optional: bool = Fa
                 raise ValueError('Expected a cuda or cpu device, but got: {}'.format(device))
         elif device.type != 'cuda':
             raise ValueError('Expected a cuda device, but got: {}'.format(device))
+    if isinstance(device, torch.cuda.device):
+        return device.idx
     return _torch_get_device_index(device, optional, allow_cpu)
 
 

From 8f3a3b063a3fc9f09a33c717e83f8c0f63109ecb Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Wed, 27 Jan 2021 14:17:33 -0800
Subject: [PATCH 3/5] Added test that verifies torch.cuda.memory_allocated
 works with torch.cuda.device

---
 test/test_cuda.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/test_cuda.py b/test/test_cuda.py
index 3d52c99df856..e847f372d5c4 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -3496,6 +3496,15 @@ class TestNamedTupleInput_1(NamedTuple):
             self.assertEqual(expected_a, x.a)
             self.assertEqual(expected_b, x.b)
 
+    @unittest.skipIf(not TEST_MULTIGPU, "Test needs multiple GPUs")
+    def test_cuda_device_memory_allocated(self):
+        from torch.cuda import memory_allocated
+        device_count = torch.cuda.device_count()
+        current_alloc = [memory_allocated(idx) for idx in range(device_count)]
+        x = torch.ones(10, device="cuda:0")
+        self.assertTrue(torch.cuda.memory_allocated(0) > current_alloc[0])
+        self.assertTrue(all(memory_allocated(torch.cuda.device(idx)) == current_alloc[idx] for idx in range(1, device_count)))
+
 
 if __name__ == '__main__':
     run_tests()

From 63bc8ecbc65bc923ee9029eb499e1594f5673064 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Wed, 27 Jan 2021 17:05:04 -0800
Subject: [PATCH 4/5] Update test

---
 test/test_cuda.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_cuda.py b/test/test_cuda.py
index e847f372d5c4..a49ffa5e1bec 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -3502,7 +3502,7 @@ def test_cuda_device_memory_allocated(self):
         device_count = torch.cuda.device_count()
         current_alloc = [memory_allocated(idx) for idx in range(device_count)]
         x = torch.ones(10, device="cuda:0")
-        self.assertTrue(torch.cuda.memory_allocated(0) > current_alloc[0])
+        self.assertTrue(memory_allocated(0) > current_alloc[0])
         self.assertTrue(all(memory_allocated(torch.cuda.device(idx)) == current_alloc[idx] for idx in range(1, device_count)))
 
 

From 776ed5c371344fedcd780cb8ab599e831ef2f704 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Wed, 27 Jan 2021 17:10:36 -0800
Subject: [PATCH 5/5] Move the test to TestCuda vs TestCudaComm class

---
 test/test_cuda.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/test/test_cuda.py b/test/test_cuda.py
index a49ffa5e1bec..ed1d82a91112 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -3113,6 +3113,16 @@ def test_batch_norm_gather_stats(self):
         self.assertEqual(mean, torch.ones(3, device='cuda'))
         self.assertEqual(invstd, torch.ones(3, device='cuda'))
 
+    @unittest.skipIf(not TEST_MULTIGPU, "Test needs multiple GPUs")
+    def test_cuda_device_memory_allocated(self):
+        from torch.cuda import memory_allocated
+        device_count = torch.cuda.device_count()
+        current_alloc = [memory_allocated(idx) for idx in range(device_count)]
+        x = torch.ones(10, device="cuda:0")
+        self.assertTrue(memory_allocated(0) > current_alloc[0])
+        self.assertTrue(all(memory_allocated(torch.cuda.device(idx)) == current_alloc[idx] for idx in range(1, device_count)))
+
+
 class TestCudaComm(TestCase):
     def _test_broadcast(self, input):
         if not TEST_MULTIGPU:
@@ -3496,15 +3506,6 @@ class TestNamedTupleInput_1(NamedTuple):
             self.assertEqual(expected_a, x.a)
             self.assertEqual(expected_b, x.b)
 
-    @unittest.skipIf(not TEST_MULTIGPU, "Test needs multiple GPUs")
-    def test_cuda_device_memory_allocated(self):
-        from torch.cuda import memory_allocated
-        device_count = torch.cuda.device_count()
-        current_alloc = [memory_allocated(idx) for idx in range(device_count)]
-        x = torch.ones(10, device="cuda:0")
-        self.assertTrue(memory_allocated(0) > current_alloc[0])
-        self.assertTrue(all(memory_allocated(torch.cuda.device(idx)) == current_alloc[idx] for idx in range(1, device_count)))
-
 
 if __name__ == '__main__':
     run_tests()