Optimize vis_heap_chunks command (#1678)

* Optimize vis_heap_chunks command This commit optimizes the vis_heap_chunks command by: 1) precalculating bin labels instead of computing them on demand for each chunk 2) calling pwndbg.gdblib.memory.read once instead of twice in a hot loop For `vis 2000` command ran when debugging python3 shell, the first change cut down the execution time from almost 20s to 5s. Another benchmark, for both 1) and 2) changes showed 19.28s->4.14s timing. The benchmark done is included in this commit in profiling/benchmark_vis_heap_chunks/ so that it can be reproduced e.g. to optimize the function further or to reproduce my results.
pwndbg · May 2, 2023 · 2f03a90 · 2f03a90
1 parent 6ff05bb
commit 2f03a90
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 11 deletions.
diff --git a/profiling/benchmark_vis_heap_chunks/README.md b/profiling/benchmark_vis_heap_chunks/README.md
@@ -0,0 +1,2 @@
+This benchmark was used to investigate performance problems with the `vis_heap_chunks` command described in https://github.com/pwndbg/pwndbg/issues/1675
+
diff --git a/profiling/benchmark_vis_heap_chunks/bench.sh b/profiling/benchmark_vis_heap_chunks/bench.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+gdb --batch --ex 'break exit' --ex 'run' --ex 'source gdbscript.py' --args $(which python3) -c 'import sys; sys.exit(0)'
diff --git a/profiling/benchmark_vis_heap_chunks/gdbscript.py b/profiling/benchmark_vis_heap_chunks/gdbscript.py
@@ -0,0 +1,9 @@
+import gdb, pwndbg
+
+pwndbg.profiling.profiler.start()
+result = gdb.execute("vis 2000", to_string=True)
+pwndbg.profiling.profiler.stop('profile.prof')
+
+# Save result in case user wants to inspect it
+with open("result", "w") as f:
+    f.write(result)
diff --git a/pwndbg/commands/heap.py b/pwndbg/commands/heap.py
@@ -1,5 +1,7 @@
 import argparse
 import ctypes
+from typing import Dict
+from typing import List
 
 import gdb
 from tabulate import tabulate
@@ -914,6 +916,8 @@ def vis_heap_chunks(
         >> 1
     )
 
+    bin_labels_map: Dict[int, List[str]] = bin_labels_mapping(bin_collections)
+
     for c, stop in enumerate(chunk_delims):
         color_func = color_funcs[c % len(color_funcs)]
 
@@ -940,17 +944,18 @@ def vis_heap_chunks(
             if printed % 2 == 0:
                 out += "\n0x%x" % cursor
 
-            cell = pwndbg.gdblib.arch.unpack(pwndbg.gdblib.memory.read(cursor, ptr_size))
+            data = pwndbg.gdblib.memory.read(cursor, ptr_size)
+            cell = pwndbg.gdblib.arch.unpack(data)
             cell_hex = "\t0x{:0{n}x}".format(cell, n=ptr_size * 2)
 
             out += color_func(cell_hex)
             printed += 1
 
-            labels.extend(bin_labels(cursor, bin_collections))
+            labels.extend(bin_labels_map.get(cursor, []))
             if cursor == arena.top:
                 labels.append("Top chunk")
 
-            asc += bin_ascii(pwndbg.gdblib.memory.read(cursor, ptr_size))
+            asc += bin_ascii(data)
             if printed % 2 == 0:
                 out += "\t" + color_func(asc) + ("\t <-- " + ", ".join(labels) if labels else "")
                 asc = ""
@@ -975,8 +980,14 @@ def bin_ascii(bs):
     return "".join(chr(c) if c in valid_chars else "." for c in bs)
 
 
-def bin_labels(addr, collections):
-    labels = []
+def bin_labels_mapping(collections):
+    """
+    Returns all potential bin labels for all potential addresses
+    We precompute all of them because doing this on demand was too slow and inefficient
+    See #1675 for more details
+    """
+    labels_mapping: Dict[int, List[str]] = {}
+
     for bins in collections:
         if not bins:
             continue
@@ -989,14 +1000,13 @@ def bin_labels(addr, collections):
             count = "/{:d}".format(b.count) if bins_type == BinType.TCACHE else None
             chunks = b.fd_chain
             for chunk_addr in chunks:
-                if addr == chunk_addr:
-                    labels.append(
-                        "{:s}[{:s}][{:d}{}]".format(
-                            bins_type, size, chunks.index(addr), count or ""
-                        )
+                labels_mapping.setdefault(chunk_addr, []).append(
+                    "{:s}[{:s}][{:d}{}]".format(
+                        bins_type, size, chunks.index(chunk_addr), count or ""
                     )
+                )
 
-    return labels
+    return labels_mapping
 
 
 try_free_parser = argparse.ArgumentParser(