diff --git a/test/test_profiler.py b/test/test_profiler.py index 797ad0995913..43b0b1639931 100644 --- a/test/test_profiler.py +++ b/test/test_profiler.py @@ -9,6 +9,8 @@ from torch.autograd.profiler import profile from torch.autograd import kineto_available +import torch.profiler + try: import psutil HAS_PSUTIL = True @@ -129,5 +131,36 @@ def test_kineto(self): self.assertTrue(found_memcpy) # p.export_chrome_trace("/tmp/test_trace.json") + + @unittest.skipIf(not kineto_available(), "Kineto is required") + @unittest.skipIf(not torch.cuda.is_available(), "CUDA is required") + def test_profiler_kineto_api(self): + called_num = [0] + def test_output_fn(p): + print(p.key_averages().table( + sort_by="self_cuda_time_total", row_limit=-1)) + # p.export_chrome_trace("/tmp/test_trace_" + str(called_num[0]) + ".json") + called_num[0] += 1 + + with profile(use_cuda=True, use_kineto=True): + self.payload() + + with torch.profiler.profile( + activities=[ + torch.profiler.ProfilerActivity.CPU, + torch.profiler.ProfilerActivity.CUDA], + enable_pred=torch.profiler.EnablePred( + wait=1, + warmup=1, + active=2, + output_fn=test_output_fn) + ) as p: + for idx in range(8): + self.payload() + p.next_step() + + self.assertEqual(called_num[0], 2) + + if __name__ == '__main__': run_tests() diff --git a/torch/autograd/profiler.py b/torch/autograd/profiler.py index f416ce3d2f05..ffc937ed4c15 100644 --- a/torch/autograd/profiler.py +++ b/torch/autograd/profiler.py @@ -444,6 +444,15 @@ def __enter__(self): torch.autograd._enable_profiler_legacy(self.config) return self + def _prepare_kineto_trace(self): + assert self.kineto_activities + self.entered = True + torch.autograd._prepare_profiler(self.config, self.kineto_activities) + + def _start_kineto_trace(self): + assert self.kineto_activities + torch.autograd._enable_profiler(self.config, self.kineto_activities) + def __exit__(self, exc_type, exc_val, exc_tb): if not self.enabled: return diff --git a/torch/profiler/__init__.py b/torch/profiler/__init__.py new file mode 100644 index 000000000000..d56213c7fba0 --- /dev/null +++ b/torch/profiler/__init__.py @@ -0,0 +1,6 @@ +# type: ignore +r''' +PyTorch Profiler API +''' + +from .profiler import profile, EnablePred, ProfilerActivity diff --git a/torch/profiler/profiler.py b/torch/profiler/profiler.py new file mode 100644 index 000000000000..f2e7c156eff6 --- /dev/null +++ b/torch/profiler/profiler.py @@ -0,0 +1,188 @@ +import torch.autograd.profiler as prof +from torch.autograd import ProfilerActivity + +from typing import Callable, Iterable, Optional + +class EnablePred(object): + """ + EnablePred describes on which steps profiler is active: + - profiler starts in inactive state and stays in inactive state for the first 'wait' steps + - profiler then enters a warmup state and stays in this state for the next 'warmup' steps + - profiler then starts actively tracing/collecting stats for the next 'active' steps + - after this, profiler returns to the inactive state and cycle repeats + + In case output_fn is specified, it is called every time the trace is ready + """ + class Action(object): + START_WARMUP = 0 + START_TRACE = 1 + STOP_TRACE = 2 + + class State(object): + INACTIVE = 0 + WARMUP = 1 + ACTIVE = 2 + + def __init__(self, wait: int, warmup: int, active: int, output_fn: Optional[Callable[[prof.profile], None]]): + assert wait >= 0 and warmup >= 0 and active > 0 + if warmup == 0: + print("Warning: profiler won't be using a warmup, which can skew profiler results") + self.wait = wait + self.warmup = warmup + self.active = active + self.output_fn = output_fn + def active_active_fn(step): + if self._mod_step(step) == 1: + return [EnablePred.Action.STOP_TRACE, EnablePred.Action.START_WARMUP, EnablePred.Action.START_TRACE] + else: + return [] + def inactive_warmup_fn(_): + assert False, "incorrect state sequence" + + self.actions_map = { + EnablePred.State.ACTIVE: { + EnablePred.State.ACTIVE: active_active_fn, + EnablePred.State.WARMUP: [EnablePred.Action.START_TRACE], + EnablePred.State.INACTIVE: [EnablePred.Action.START_WARMUP, EnablePred.Action.START_TRACE], + }, + EnablePred.State.WARMUP: { + EnablePred.State.ACTIVE: [EnablePred.Action.STOP_TRACE, EnablePred.Action.START_WARMUP], + EnablePred.State.WARMUP: [], + EnablePred.State.INACTIVE: [EnablePred.Action.START_WARMUP], + }, + EnablePred.State.INACTIVE: { + EnablePred.State.ACTIVE: [EnablePred.Action.STOP_TRACE], + EnablePred.State.WARMUP: inactive_warmup_fn, + EnablePred.State.INACTIVE: [], + } + } + + def _mod_step(self, step:int): + sum_states = self.wait + self.warmup + self.active + r = step % sum_states + if r == 0: + r = sum_states + return r + + def _num_state(self, step:int): + mod_step = self._mod_step(step) + if mod_step <= self.wait: + return EnablePred.State.INACTIVE + elif mod_step <= self.wait + self.warmup: + return EnablePred.State.WARMUP + else: + return EnablePred.State.ACTIVE + + def actions(self, step:int): + if step == 1: + st = self._num_state(step) + if st == EnablePred.State.ACTIVE: + return [EnablePred.Action.START_WARMUP, EnablePred.Action.START_TRACE] + elif st == EnablePred.State.WARMUP: + return [EnablePred.Action.START_WARMUP] + else: + return [] + else: + st = self._num_state(step) + prev_st = self._num_state(step - 1) + acts = self.actions_map[st][prev_st] + if callable(acts): + return acts(step) + else: + return acts + + +class profile(object): + """ + PyTorch profiler context manager. + + Arguments: + activities - list of activity groups (CPU, CUDA) + enable_pred (optional) - iteration predicate function, used together with `next_step` call + + Notes: + - profiler is based on the Kineto library - system profiler library, with support for CUPTI tracing + - enable_pred is used for training loop tracing, allowing users to enable profiler on certain + iterations and account for the warmup + - when enable_pred is not set, profiler is always active + - next_step uses record_function api to add information about steps in the trace + """ + def __init__( + self, + activities:Iterable[ProfilerActivity], + enable_pred:Optional[EnablePred] = None, + record_shapes=False, + profile_memory=False, + with_stack=False): + self.activities = activities + self.enable_pred = enable_pred + self.record_shapes = record_shapes + self.profile_memory = profile_memory + self.with_stack = with_stack + self.step_num = 0 + self.profiler = None + self.step_rec_fn = None + + if not self.enable_pred: + print("Warning: using profiler without enable predicate may result in the skewed " + + "results, use enable_pred to control the warmup time") + + def __enter__(self): + self.next_step() + if not self.enable_pred: + self._run_action(EnablePred.Action.START_WARMUP) + self._run_action(EnablePred.Action.START_TRACE) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.step_rec_fn: + self.step_rec_fn.__exit__(None, None, None) + if self.profiler: + if self.enable_pred: + if self.enable_pred._num_state(self.step_num) == EnablePred.State.WARMUP: + self._run_action(EnablePred.Action.START_TRACE) + self._run_action(EnablePred.Action.STOP_TRACE, keep_profiler=True) + + def next_step(self): + if self.step_rec_fn: + self.step_rec_fn.__exit__(None, None, None) + self.step_num += 1 + if self.enable_pred: + self._run_actions(self.step_num) + + self.step_rec_fn = prof.record_function("ProfilerStep#" + str(self.step_num)) + self.step_rec_fn.__enter__() + + def export_chrome_trace(self, path:str): + assert self.profiler + return self.profiler.export_chrome_trace(path) + + def key_averages(self, group_by_input_shape:bool=False, group_by_stack_n:int=0): + assert self.profiler + return self.profiler.key_averages(group_by_input_shape, group_by_stack_n) + + def _run_actions(self, step_num): + for act in self.enable_pred.actions(self.step_num): + self._run_action(act) + + def _run_action(self, act, keep_profiler=False): + if act == EnablePred.Action.START_WARMUP: + self.profiler = prof.profile( + use_cuda=(ProfilerActivity.CUDA in self.activities), + use_cpu=(ProfilerActivity.CPU in self.activities), + record_shapes=self.record_shapes, + profile_memory=self.profile_memory, + with_stack=self.with_stack, + use_kineto=True, + ) + self.profiler._prepare_kineto_trace() + elif act == EnablePred.Action.START_TRACE: + assert self.profiler is not None + self.profiler._start_kineto_trace() + elif act == EnablePred.Action.STOP_TRACE: + assert self.profiler is not None + self.profiler.__exit__(None, None, None) + if self.enable_pred.output_fn: + self.enable_pred.output_fn(self.profiler) + if not keep_profiler: + self.profiler = None