Skip to content

Commit

Permalink
stall-analyser: add output support for flamegraph
Browse files Browse the repository at this point in the history
a typical session is like:

```console
$ ./stall-analyser.py --format trace -e scylla/libexec/scylla \
  reactor_stalled_231d31df_2022_2_twcs_3h.log  > out.folded
$ flamegraph.pl out.folded > /tmp/folded.svg
$ xdg-open /tmp/folded.svg
```

Signed-off-by: Kefu Chai <kefu.chai@scylladb.com>
  • Loading branch information
tchaikov committed Apr 3, 2024
1 parent d640fd3 commit 94559c7
Showing 1 changed file with 66 additions and 5 deletions.
71 changes: 66 additions & 5 deletions scripts/stall-analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import re

import addr2line
from collections import defaultdict
from itertools import chain
from typing import Self


Expand Down Expand Up @@ -45,6 +47,8 @@ def get_command_line_parser():
help='Process only stalls lasting the given time, in milliseconds, or longer')
parser.add_argument('-b', '--branch-threshold', type=float, default=0.03,
help='Drop branches responsible for less than this threshold relative to the previous level, not global. (default 3%%)')
parser.add_argument('--format', choices=['graph', 'trace'], default='graph',
help='The output format, default is %(default)s. `trace` is suitable as input for flamegraph.pl')
parser.add_argument('file', nargs='?',
help='File containing reactor stall backtraces. Read from stdin if missing.')
return parser
Expand Down Expand Up @@ -271,6 +275,58 @@ def _recursive_print_graph(n: Node, total: int = 0, count: int = 0, level: int =
_recursive_print_graph(r)


class StackCollapse:
# collapse stall backtraces into single lines
def __init__(self, resolver: addr2line.BacktraceResolver):
self.resolver = resolver
# track every stack and the its total sample counts
self.collapsed = defaultdict(int)
# to match lines like
# (inlined by) position_in_partition::tri_compare::operator() at ././position_in_partition.hh:485
self.pattern = re.compile(r'''(.+)\s # function signature
at\s #
[^:]+:(?:\?|\d+) # <source>:<line number>''', re.X)

def process_trace(self, frames: list[str], count: int) -> None:
# each stall report is mapped to a line of perf samples
# so the output looks like:
# row_cache::update;row_cache::do_update;row_cache::upgrade_entry 42
# from outer-most caller to the inner-most callee, and 42 is the time
# in ms, but we use it for the count of samples.
self.collapsed[';'.join(frames)] += count

def _annotate_func(self, line):
# sample input:
# (inlined by) position_in_partition::tri_compare::operator() at ././position_in_partition.hh:485
# sample output:
# position_in_partition::tri_compare::operator()_[i]
if line.startswith("??"):
return ""

inlined_prefix = ' (inlined by) '
inlined = line.startswith(inlined_prefix)
if inlined:
line = line[len(inlined_prefix):]

matched = self.pattern.match(line)
assert matched, f"bad line: {line}"
func = matched.groups()[0]
# annotations
if inlined:
func += "_[i]"
return func

def _resolve(self, addr) -> str:
lines = self.resolver.resolve_address(addr).splitlines()
return (self._annotate_func(line) for line in lines)

def print_graph(self, *_):
for stack, count in self.collapsed.items():
frames = filter(lambda frame: frame,
chain.from_iterable(self._resolve(addr) for addr in stack.split(';')))
print(';'.join(reversed(list(frames))), count)


def print_stats(tally: dict, tmin: int) -> None:
data = []
total_time = 0
Expand Down Expand Up @@ -339,7 +395,11 @@ def main():
if args.executable:
resolver = addr2line.BacktraceResolver(executable=args.executable,
concise=not args.full_function_names)
graph = Graph(resolver)
if args.format == 'graph':
render = Graph(resolver)
else:
render = StackCollapse(resolver)

for s in input:
if comment.search(s) or not pattern.search(s):
continue
Expand Down Expand Up @@ -377,12 +437,13 @@ def main():
break
tmin = args.minimum or 0
if t >= tmin:
graph.process_trace(trace, t)
render.process_trace(trace, t)

try:
print_command_line_options(args)
print_stats(tally, tmin)
graph.print_graph(args.direction, args.width, args.branch_threshold)
if args.format == 'graph':
print_command_line_options(args)
print_stats(tally, tmin)
render.print_graph(args.direction, args.width, args.branch_threshold)
except BrokenPipeError:
pass

Expand Down

0 comments on commit 94559c7

Please sign in to comment.