Skip to content

Commit

Permalink
[Profiler] Include more uncategorized events in memory profile (#101200)
Browse files Browse the repository at this point in the history
Summary: This PR adds handling for allocations / frees which we cannot prove are for Tensors. (And thus aren't assigned an ID.) These events are still important for judging overall utilization.

Test Plan: CI and Unit tests.

Differential Revision: D45458885

Pulled By: aaronenyeshi

Pull Request resolved: #101200
Approved by: https://github.com/anupambhatnagar, https://github.com/davidberard98
  • Loading branch information
aaronenyeshi authored and pytorchmergebot committed Jun 8, 2023
1 parent 675f259 commit 2a4fa25
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 11 deletions.
48 changes: 48 additions & 0 deletions test/profiler/test_memory_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1552,6 +1552,54 @@ def id_for_testing(key):
destroy GRADIENT 17(v0) 2 kB
destroy GRADIENT 13(v0) 1024 kB""")

def test_memory_timeline_no_id(self) -> None:
# On CPU the default behavior is to simply forward to malloc. That
# means that when we free `x` the allocator doesn't actually know how
# many bytes are in the allocation, and thus there's no point to
# calling `c10::reportMemoryUsageToProfiler`. So in order to test that
# memory profiler processes this case correctly we need to use CUDA
# where we do always keep a record.
x = torch.ones((1024,), device="cuda" if torch.cuda.is_available() else "cpu")

with profile() as prof:
# We never see `x` used so we don't know the storage is for a
# Tensor, but we do still see the free event.
del x

# For empty we see the allocation and free, but not any use.
# So this also cannot be identified as a Tensor.
y = torch.empty((64,))
del y

z = torch.empty((256,))
z.view_as(z) # Show `z` to the profiler
del z

memory_profile = prof._memory_profile()

expected = [
# x
(_memory_profiler.Action.PREEXISTING, 4096),
(_memory_profiler.Action.DESTROY, 4096),
#
# y
(_memory_profiler.Action.CREATE, 256),
(_memory_profiler.Action.DESTROY, 256),
#
# z
(_memory_profiler.Action.CREATE, 1024),
(_memory_profiler.Action.DESTROY, 1024),
]

# See above.
if not torch.cuda.is_available():
expected = expected[2:]

self.assertEqual(
[(action, size) for _, action, _, size in memory_profile.timeline],
expected,
)


if __name__ == "__main__":
run_tests()
52 changes: 41 additions & 11 deletions torch/profiler/_memory_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
from torch._utils import _element_size
from torch.profiler import _utils

from typing_extensions import Literal

KeyAndID = Tuple["Key", int]
TensorAndID = Tuple["TensorKey", int]

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -63,6 +66,11 @@ class Action(enum.Enum):
DESTROY = enum.auto()


@dataclasses.dataclass(eq=True, unsafe_hash=False, frozen=True)
class Key:
device: torch.device


@dataclasses.dataclass
class _Storage:
"""Bundle storage pointer and id.
Expand All @@ -85,7 +93,7 @@ def __hash__(self) -> int:


@dataclasses.dataclass(eq=True, unsafe_hash=True, frozen=True)
class TensorKey:
class TensorKey(Key):
"""Hashable identifier for a storage which has been asigned an ID.
A detailed description of Tensor IDs and why they are needed is given in
Expand All @@ -97,7 +105,6 @@ class TensorKey:

id: int
storage: _Storage
device: torch.device

def __repr__(self) -> str:
return f"id={self.id}: {repr(self.storage):<24} ({self.device})"
Expand All @@ -117,7 +124,7 @@ def _make(
and storage_ptr is not None
and allocation_id is not None
):
return TensorKey(tensor_id, _Storage(storage_ptr, allocation_id), device)
return TensorKey(device, tensor_id, _Storage(storage_ptr, allocation_id))
return None

@classmethod
Expand Down Expand Up @@ -633,7 +640,9 @@ def setdefault_by_version(
) -> None:
self._values[key.id].by_version.setdefault((key, version), category)

def get(self, key: TensorKey, version: int) -> Optional[Category]:
def get(self, key: Key, version: int) -> Optional[Category]:
if isinstance(key, Key) and not isinstance(key, TensorKey):
return None
element = self._values[key.id]
return (
element.by_id
Expand All @@ -658,15 +667,34 @@ def __init__(self, result: _ProfilerResult) -> None:
self._set_autograd_detail()

@property
def timeline(self) -> Tuple[Tuple[int, Action, TensorAndID, int], ...]:
def timeline(self) -> Tuple[Tuple[int, Action, KeyAndID, int], ...]:
output: List[Tuple[int, Action, KeyAndID, int]] = []
allocation_times: Dict[Tuple[TensorKey, bool], int] = {}
live_unknown: Dict[Tuple[int, torch.device], Literal[True]] = {}
for event in self._op_tree.dfs():
if event.typed[0] == _EventType.Allocation:
alloc_fields = event.typed[1]
key = TensorKey.from_allocation(alloc_fields)
if key is not None:
is_allocation = alloc_fields.alloc_size > 0
allocation_times[(key, is_allocation)] = event.start_time_ns
alloc_size = alloc_fields.alloc_size
is_allocation = alloc_size > 0
t = event.start_time_ns

tkey = TensorKey.from_allocation(alloc_fields)
if tkey is not None:
allocation_times[(tkey, is_allocation)] = t

else:
key = Key(alloc_fields.device)
ptr_and_device = (alloc_fields.ptr, key.device)
if is_allocation:
if ptr_and_device in live_unknown:
output.append((t, Action.INCREMENT_VERSION, (key, 0), alloc_size))
else:
live_unknown[ptr_and_device] = True
output.append((t, Action.CREATE, (key, 0), alloc_size))
else:
output.append((t, Action.DESTROY, (key, 0), -alloc_size))
if not live_unknown.pop(ptr_and_device, False):
output.append((-1, Action.PREEXISTING, (key, 0), -alloc_size))

snapshot = self._category_snapshot()
last_version = dict(sorted(snapshot.keys()))
Expand All @@ -693,12 +721,14 @@ def timeline(self) -> Tuple[Tuple[int, Action, TensorAndID, int], ...]:
t = allocation_times[(key, False)]
events.append((t, Action.DESTROY, (key, last_version[key])))

events.sort(key=lambda x: (x[0], x[1].value))
return tuple(
output.extend(
(time, action, (key, version), self._size_map[key])
for time, action, (key, version) in events
)

output.sort(key=lambda x: (x[0], x[1].value))
return tuple(output)

def _is_gradient(self, *args, **kwargs) -> bool:
return self._categories.get(*args, **kwargs) == Category.GRADIENT

Expand Down

0 comments on commit 2a4fa25

Please sign in to comment.