Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Profiler] iterate frontend function events for profiler post processing #124596

Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
27 changes: 19 additions & 8 deletions torch/autograd/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,13 @@ def _device_memory_usage(mem_record):
else 0
)

# Create and return FunctionEvent list
function_events = []
# Create and return FunctionEvent list, which contains all function events
# Here 2 function events are created:
# all_function_events contains all events associated with each kineto event from result
all_function_events = []
# frontend_function_events contains the events in aten or torch frontend level,
# whose correlation id is 0
frontend_function_events = []
device_corr_map: Dict[int, List[FunctionEvent]] = {}
max_evt_id = 0
for kineto_event in result.events():
Expand Down Expand Up @@ -532,15 +537,21 @@ def _device_memory_usage(mem_record):
if cuda_time > 0:
fe.append_kernel(fe.name, fe.device_index, cuda_time)
fe.is_legacy = True
function_events.append(fe)
all_function_events.append(fe)
corr_id = kineto_event.linked_correlation_id()
if corr_id > 0:
if corr_id not in device_corr_map:
device_corr_map[corr_id] = []
device_corr_map[corr_id].append(fe)
elif corr_id == 0:
frontend_function_events.append(fe)
else:
raise RuntimeError(
f"Got negative correlation id {corr_id} in profiler post processing"
)

# associate device kernels and device runtime (CPU) with CPU events
for fe in function_events:
for fe in frontend_function_events:
if (
fe.device_type == DeviceType.CPU
and not fe.is_async
Expand Down Expand Up @@ -587,17 +598,17 @@ def createFunctionEventForMemoryEvents(evt):
if not mem_record[1]:
max_evt_id += 1
fe = createFunctionEventForMemoryEvents(mem_record[0])
function_events.append(fe)
all_function_events.append(fe)

for oom_record in oom_records:
max_evt_id += 1
fe = createFunctionEventForMemoryEvents(oom_record)
function_events.append(fe)
all_function_events.append(fe)

function_events.sort(
all_function_events.sort(
key=lambda evt: [evt.time_range.start, -evt.time_range.end]
)
return function_events
return all_function_events


class record_function(_ContextDecorator):
Expand Down