update profiler doc strings (#51395)

Summary: Fixes formatting for autograd.profiler doc string (was broken), slightly expands profiler.profile documentation. Pull Request resolved: #51395 Reviewed By: ilia-cher Differential Revision: D26162349 Pulled By: ngimel fbshipit-source-id: ac7af8e0f3dbae2aa899ad815d2311c2758ee57c
pytorch · Jan 30, 2021 · e26fccc · e26fccc
1 parent 17b5683
commit e26fccc
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 17 deletions.
diff --git a/torch/autograd/profiler.py b/torch/autograd/profiler.py
@@ -347,11 +347,9 @@ class profile(object):
 
     Args:
         enabled (bool, optional): Setting this to False makes this context manager a no-op.
-            Default: ``True``.
 
         use_cuda (bool, optional): Enables timing of CUDA events as well using the cudaEvent API.
             Adds approximately 4us of overhead to each tensor operation.
-            Default: ``False``
 
         record_shapes (bool, optional): If shapes recording is set, information
             about input dimensions will be collected. This allows one to see which
@@ -365,18 +363,18 @@ class profile(object):
             collection.
 
         with_flops (bool, optional): If with_flops is set, the profiler will estimate
-        the FLOPS (floating pointer operations per second) value using the operator's input shape
-        and total CPU time. This allows one to estimate the hardware performance. Currently,
-        this option only works for the GEMM and CONV operator, default: ``False``
+            the FLOPS (floating pointer operations per second) value using the operator's input shape
+            and total CPU time. This allows one to estimate the hardware performance. Currently,
+            this option only works for the matrix multiplication and convolution functions.
 
-        profile_memory (bool, optional): Whether to report memory usage, default: ``False``
+        profile_memory (bool, optional): track tensor memory allocation/deallocation.
 
-        with_stack (bool, optional): record source information (file and line number) for the ops
+        with_stack (bool, optional): record source information (file and line number) for the ops.
 
-        use_kineto (bool, default False): experimental support for Kineto profiler
+        use_kineto (bool, optional): experimental, enable profiling with Kineto profiler.
 
-        use_cpu (default True) - whether to profile CPU events; setting to False requires
-            use_kineto=True and can be used to lower the overhead for GPU-only profiling
+        use_cpu (bool, optional): profile CPU events; setting to ``False`` requires
+            ``use_kineto=True`` and can be used to lower the overhead for GPU-only profiling.
 
     .. warning:
         Enabling memory profiling or source attribution incurs additional profiler
@@ -414,6 +412,7 @@ class profile(object):
     def __init__(
             self,
             enabled=True,
+            *,
             use_cuda=False,
             record_shapes=False,
             with_flops=False,

diff --git a/torch/profiler/profiler.py b/torch/profiler/profiler.py
@@ -7,15 +7,18 @@
 
 
 class ProfilerAction(Enum):
+    """
+    Profiler actions that can be taken at the specified intervals
+    """
     NONE = 0
     WARMUP = 1
     RECORD = 2
     RECORD_AND_SAVE = 3
 
 
-def schedule(*, wait: int, warmup: int, active: int):
+def schedule(*, wait: int, warmup: int, active: int) -> Callable:
     """
-    Represents profiler behavior: wait for ``wait`` steps, then
+    Returns a callable that can be used as profiler ``schedule`` argument. The profiler will wait for ``wait`` steps, then
     do the warmup for the next ``warmup`` steps, then
     do the active recording for the next ``active`` steps and then
     repeat the cycle staring with the next step.
@@ -52,14 +55,15 @@ class profile(object):
 
     Args:
 
-    - ``activities`` - list of activity groups (CPU, CUDA) to use in profiling;
+    - ``activities`` - list of activity groups (CPU, CUDA) to use in profiling, supported values:
+      ``torch.profiler.ProfilerActivity.CPU``, ``torch.profiler.ProfilerActivity.CUDA``
     - ``schedule`` - callable that takes step (int) as a single parameter and returns
-      ``ProfilerAction`` value that specifies the profiler action on each step;
-    - ``on_trace_ready`` (optional) - callable, called each time the trace is ready
+      ``ProfilerAction`` value that specifies the profiler action to perform at each step;
+    - ``on_trace_ready`` - callable that is called at each step when ``schedule`` returns ``ProfilerAction.RECORD_AND_SAVE``
       during the profiling;
     - ``record_shapes`` - save information about operator's input shapes;
     - ``profile_memory`` - track tensor memory allocation/deallocation;
-    - ``with_stack`` - save stack traces;
+    - ``with_stack`` - record source information (file and line number) for the ops.
     - ``use_gpu`` - (deprecated, use ``activities``).
 
     .. note::
@@ -86,7 +90,7 @@ class profile(object):
         print(p.key_averages().table(
             sort_by="self_cuda_time_total", row_limit=-1))
 
-    Usimg the profiler's ``schedule``, ``on_trace_ready`` and ``step`` functions:
+    Using the profiler's ``schedule``, ``on_trace_ready`` and ``step`` functions:
 
     .. code-block:: python