[Profiler][Easy] Make timestamps in memory timelines be in microsecon…

…ds (us) (pytorch#112772) Summary: Convert the timestamps in memory timelines from ns to us. Test Plan: CI Differential Revision: D50937241 Pulled By: aaronenyeshi Pull Request resolved: pytorch#112772 Approved by: https://github.com/anupambhatnagar, https://github.com/davidberard98
leslie-fang-intel · Nov 3, 2023 · 0d95378 · 0d95378
1 parent 2d5fec4
commit 0d95378
Showing 1 changed file with 13 additions and 5 deletions.
diff --git a/torch/profiler/_memory_profiler.py b/torch/profiler/_memory_profiler.py
@@ -1003,7 +1003,7 @@ def _coalesce_timeline(self, device_str):
         """
         device = torch.device(device_str)
         times: List[int] = []
-        sizes: List[List[float]] = []
+        sizes: List[List[int]] = []
 
         def update(key, version, delta):
             category = (
@@ -1012,21 +1012,25 @@ def update(key, version, delta):
                 else None
             )
             index = _CATEGORY_TO_INDEX[category] + 1
-            sizes[-1][index] += delta
+            sizes[-1][index] += int(delta)
 
         t_min = -1
         for t, action, (key, version), numbytes in self.timeline:
             if key.device != device:
                 continue
 
+            # Convert timestamps from ns to us, to match trace events.
+            if t != -1:
+                t = int(t / 1000)
+
             # Save the smallest timestamp to populate pre-existing allocs.
             if t_min == -1 or (t < t_min and t > 0):
                 t_min = t
 
             # Handle timestep
-            if not times:
+            if len(times) == 0:
                 times.append(t)
-                sizes.append([0.0] + [0.0 for _ in _CATEGORY_TO_INDEX])
+                sizes.append([0] + [0 for _ in _CATEGORY_TO_INDEX])
 
             elif t != times[-1]:
                 times.append(t)
@@ -1150,6 +1154,9 @@ def export_memory_timeline_html(
 
         mt = self._coalesce_timeline(device)
         times, sizes = np.array(mt[0]), np.array(mt[1])
+        # For this timeline, start at 0 to match Chrome traces.
+        t_min = min(times)
+        times -= t_min
         stacked = np.cumsum(sizes, axis=1) / 1024**3
         max_memory_allocated = torch.cuda.max_memory_allocated()
         max_memory_reserved = torch.cuda.max_memory_reserved()
@@ -1163,7 +1170,8 @@ def export_memory_timeline_html(
                 times / 1e3, stacked[:, i], stacked[:, i + 1], color=color, alpha=0.7
             )
         fig.legend(["Unknown" if i is None else i.name for i in _CATEGORY_TO_COLORS])
-        axes.set_xlabel("Time (us)")
+        # Usually training steps are in magnitude of ms.
+        axes.set_xlabel("Time (ms)")
         axes.set_ylabel("Memory (GB)")
         title = "\n\n".join(
             ([title] if title else [])