Skip to content

Commit

Permalink
Print periodic logs in SSD TBE benchmark (#2580)
Browse files Browse the repository at this point in the history
Summary:

Helps check trends of SSD perf(read/write) over time

Differential Revision: D57219709
  • Loading branch information
pranjalssh authored and facebook-github-bot committed May 13, 2024
1 parent 1466669 commit 1e12a65
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
7 changes: 7 additions & 0 deletions fbgemm_gpu/bench/bench_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def benchmark_requests(
nvtx_range: str = "",
# Can be used to clear model's stats after warmup for example.
callback_after_warmup: Optional[Callable[[], None]] = None,
periodic_logs: bool = False,
) -> float:
times = []

Expand All @@ -178,6 +179,12 @@ def benchmark_requests(
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
for it, req in enumerate(requests):
if periodic_logs and it % 100 == 99:
avg_time = sum(times) / len(times) * 1.0e6
last_100_avg = sum(times[-100:]) / 100 * 1.0e6
logging.info(
f"Iteration [{it}/{len(requests)}]: Last 100: {last_100_avg:.2f} us, Running avg: {avg_time:.2f} us"
)
indices, offsets, weights = req.unpack_3()
if bwd_only:
# Run forward before profiling if does backward only
Expand Down
7 changes: 6 additions & 1 deletion fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,14 +337,16 @@ def gen_split_tbe_generator(

# TODO: Adjust cache sets
cache_set = max(T * B * L, 1)
tempdir = tempfile.mkdtemp(prefix=ssd_prefix)
logging.info(f"Using SSD dir: {tempdir}")
tbe_generators = {
"HBM": gen_split_tbe_generator(EmbeddingLocation.DEVICE),
"UVM": gen_split_tbe_generator(EmbeddingLocation.MANAGED),
"UVM_CACHING": gen_split_tbe_generator(EmbeddingLocation.MANAGED_CACHING),
"SSD": lambda: SSDTableBatchedEmbeddingBags(
embedding_specs=[(E, d) for d in Ds],
cache_sets=cache_set,
ssd_storage_directory=tempfile.mkdtemp(prefix=ssd_prefix),
ssd_storage_directory=tempdir,
ssd_cache_location=EmbeddingLocation.MANAGED,
ssd_shards=8,
**common_args,
Expand Down Expand Up @@ -417,11 +419,13 @@ def gen_forward_func(
# Forward
test_name = f"{prefix} Forward"
logging.info(f"Running benchmark: {test_name}")

time_per_iter = benchmark_requests(
requests,
gen_forward_func(emb, feature_requires_grad),
flush_gpu_cache_size_mb=flush_gpu_cache_size_mb,
num_warmups=warmup_runs,
periodic_logs=True,
)

bw = f"{read_write_bytes / time_per_iter / 1.0e9: .2f}"
Expand All @@ -442,6 +446,7 @@ def gen_forward_func(
bwd_only=True,
grad=grad_output,
num_warmups=warmup_runs,
periodic_logs=True,
)

bw = f"{2 * read_write_bytes / time_per_iter / 1.0e9: .2f}"
Expand Down

0 comments on commit 1e12a65

Please sign in to comment.