Skip to content

Commit

Permalink
Optimize vis_heap_chunks command (#1678)
Browse files Browse the repository at this point in the history
* Optimize vis_heap_chunks command

This commit optimizes the vis_heap_chunks command by:
1) precalculating bin labels instead of computing them on demand for each chunk
2) calling pwndbg.gdblib.memory.read once instead of twice in a hot loop

For `vis 2000` command ran when debugging python3 shell, the first change 
cut down the execution time from almost 20s to 5s.

Another benchmark, for both 1) and 2) changes showed 19.28s->4.14s timing.

The benchmark done is included in this commit in
profiling/benchmark_vis_heap_chunks/ so that it can be reproduced e.g.
to optimize the function further or to reproduce my results.
  • Loading branch information
disconnect3d committed May 2, 2023
1 parent 6ff05bb commit 2f03a90
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 11 deletions.
2 changes: 2 additions & 0 deletions profiling/benchmark_vis_heap_chunks/README.md
@@ -0,0 +1,2 @@
This benchmark was used to investigate performance problems with the `vis_heap_chunks` command described in https://github.com/pwndbg/pwndbg/issues/1675

2 changes: 2 additions & 0 deletions profiling/benchmark_vis_heap_chunks/bench.sh
@@ -0,0 +1,2 @@
#!/bin/sh
gdb --batch --ex 'break exit' --ex 'run' --ex 'source gdbscript.py' --args $(which python3) -c 'import sys; sys.exit(0)'
9 changes: 9 additions & 0 deletions profiling/benchmark_vis_heap_chunks/gdbscript.py
@@ -0,0 +1,9 @@
import gdb, pwndbg

pwndbg.profiling.profiler.start()
result = gdb.execute("vis 2000", to_string=True)
pwndbg.profiling.profiler.stop('profile.prof')

# Save result in case user wants to inspect it
with open("result", "w") as f:
f.write(result)
32 changes: 21 additions & 11 deletions pwndbg/commands/heap.py
@@ -1,5 +1,7 @@
import argparse
import ctypes
from typing import Dict
from typing import List

import gdb
from tabulate import tabulate
Expand Down Expand Up @@ -914,6 +916,8 @@ def vis_heap_chunks(
>> 1
)

bin_labels_map: Dict[int, List[str]] = bin_labels_mapping(bin_collections)

for c, stop in enumerate(chunk_delims):
color_func = color_funcs[c % len(color_funcs)]

Expand All @@ -940,17 +944,18 @@ def vis_heap_chunks(
if printed % 2 == 0:
out += "\n0x%x" % cursor

cell = pwndbg.gdblib.arch.unpack(pwndbg.gdblib.memory.read(cursor, ptr_size))
data = pwndbg.gdblib.memory.read(cursor, ptr_size)
cell = pwndbg.gdblib.arch.unpack(data)
cell_hex = "\t0x{:0{n}x}".format(cell, n=ptr_size * 2)

out += color_func(cell_hex)
printed += 1

labels.extend(bin_labels(cursor, bin_collections))
labels.extend(bin_labels_map.get(cursor, []))
if cursor == arena.top:
labels.append("Top chunk")

asc += bin_ascii(pwndbg.gdblib.memory.read(cursor, ptr_size))
asc += bin_ascii(data)
if printed % 2 == 0:
out += "\t" + color_func(asc) + ("\t <-- " + ", ".join(labels) if labels else "")
asc = ""
Expand All @@ -975,8 +980,14 @@ def bin_ascii(bs):
return "".join(chr(c) if c in valid_chars else "." for c in bs)


def bin_labels(addr, collections):
labels = []
def bin_labels_mapping(collections):
"""
Returns all potential bin labels for all potential addresses
We precompute all of them because doing this on demand was too slow and inefficient
See #1675 for more details
"""
labels_mapping: Dict[int, List[str]] = {}

for bins in collections:
if not bins:
continue
Expand All @@ -989,14 +1000,13 @@ def bin_labels(addr, collections):
count = "/{:d}".format(b.count) if bins_type == BinType.TCACHE else None
chunks = b.fd_chain
for chunk_addr in chunks:
if addr == chunk_addr:
labels.append(
"{:s}[{:s}][{:d}{}]".format(
bins_type, size, chunks.index(addr), count or ""
)
labels_mapping.setdefault(chunk_addr, []).append(
"{:s}[{:s}][{:d}{}]".format(
bins_type, size, chunks.index(chunk_addr), count or ""
)
)

return labels
return labels_mapping


try_free_parser = argparse.ArgumentParser(
Expand Down

0 comments on commit 2f03a90

Please sign in to comment.