Print periodic logs in SSD TBE benchmark (#2580)

Summary: Helps check trends of SSD perf(read/write) over time Differential Revision: D57219709
pytorch · May 13, 2024 · 1e12a65 · 1e12a65
1 parent 1466669
commit 1e12a65
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 1 deletion.
diff --git a/fbgemm_gpu/bench/bench_utils.py b/fbgemm_gpu/bench/bench_utils.py
@@ -156,6 +156,7 @@ def benchmark_requests(
     nvtx_range: str = "",
     # Can be used to clear model's stats after warmup for example.
     callback_after_warmup: Optional[Callable[[], None]] = None,
+    periodic_logs: bool = False,
 ) -> float:
     times = []
 
@@ -178,6 +179,12 @@ def benchmark_requests(
         start_event = torch.cuda.Event(enable_timing=True)
         end_event = torch.cuda.Event(enable_timing=True)
     for it, req in enumerate(requests):
+        if periodic_logs and it % 100 == 99:
+            avg_time = sum(times) / len(times) * 1.0e6
+            last_100_avg = sum(times[-100:]) / 100 * 1.0e6
+            logging.info(
+                f"Iteration [{it}/{len(requests)}]: Last 100: {last_100_avg:.2f} us, Running avg: {avg_time:.2f} us"
+            )
         indices, offsets, weights = req.unpack_3()
         if bwd_only:
             # Run forward before profiling if does backward only

diff --git a/fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py b/fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py
@@ -337,14 +337,16 @@ def gen_split_tbe_generator(
 
     # TODO: Adjust cache sets
     cache_set = max(T * B * L, 1)
+    tempdir = tempfile.mkdtemp(prefix=ssd_prefix)
+    logging.info(f"Using SSD dir: {tempdir}")
     tbe_generators = {
         "HBM": gen_split_tbe_generator(EmbeddingLocation.DEVICE),
         "UVM": gen_split_tbe_generator(EmbeddingLocation.MANAGED),
         "UVM_CACHING": gen_split_tbe_generator(EmbeddingLocation.MANAGED_CACHING),
         "SSD": lambda: SSDTableBatchedEmbeddingBags(
             embedding_specs=[(E, d) for d in Ds],
             cache_sets=cache_set,
-            ssd_storage_directory=tempfile.mkdtemp(prefix=ssd_prefix),
+            ssd_storage_directory=tempdir,
             ssd_cache_location=EmbeddingLocation.MANAGED,
             ssd_shards=8,
             **common_args,
@@ -417,11 +419,13 @@ def gen_forward_func(
         # Forward
         test_name = f"{prefix} Forward"
         logging.info(f"Running benchmark: {test_name}")
+
         time_per_iter = benchmark_requests(
             requests,
             gen_forward_func(emb, feature_requires_grad),
             flush_gpu_cache_size_mb=flush_gpu_cache_size_mb,
             num_warmups=warmup_runs,
+            periodic_logs=True,
         )
 
         bw = f"{read_write_bytes / time_per_iter / 1.0e9: .2f}"
@@ -442,6 +446,7 @@ def gen_forward_func(
             bwd_only=True,
             grad=grad_output,
             num_warmups=warmup_runs,
+            periodic_logs=True,
         )
 
         bw = f"{2 * read_write_bytes / time_per_iter / 1.0e9: .2f}"