Skip to content

Commit

Permalink
perf lock: Look up callchain for the contended locks
Browse files Browse the repository at this point in the history
The lock contention tracepoints don't provide lock names.  All we can
do is to get stack traces and show the caller instead.  To minimize
the overhead it's limited to up to 8 stack traces and display the
first non-lock function symbol name as a caller.

  $ perf lock report -F acquired,contended,avg_wait,wait_total

                  Name   acquired  contended     avg wait    total wait

   update_blocked_a...         40         40      3.61 us     144.45 us
   kernfs_fop_open+...          5          5      3.64 us      18.18 us
    _nohz_idle_balance          3          3      2.65 us       7.95 us
   tick_do_update_j...          1          1      6.04 us       6.04 us
    ep_scan_ready_list          1          1      3.93 us       3.93 us
  ...

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220615163222.1275500-8-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
  • Loading branch information
namhyung authored and acmel committed Jul 12, 2022
1 parent 7cb2a53 commit 0d2997f
Showing 1 changed file with 156 additions and 4 deletions.
160 changes: 156 additions & 4 deletions tools/perf/builtin-lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "util/symbol.h"
#include "util/thread.h"
#include "util/header.h"
#include "util/callchain.h"

#include <subcmd/pager.h>
#include <subcmd/parse-options.h>
Expand All @@ -19,6 +20,7 @@
#include "util/tool.h"
#include "util/data.h"
#include "util/string2.h"
#include "util/map.h"

#include <sys/types.h>
#include <sys/prctl.h>
Expand All @@ -32,6 +34,7 @@
#include <linux/kernel.h>
#include <linux/zalloc.h>
#include <linux/err.h>
#include <linux/stringify.h>

static struct perf_session *session;

Expand Down Expand Up @@ -120,6 +123,24 @@ static struct rb_root thread_stats;
static bool combine_locks;
static bool show_thread_stats;

/*
* CONTENTION_STACK_DEPTH
* Number of stack trace entries to find callers
*/
#define CONTENTION_STACK_DEPTH 8

/*
* CONTENTION_STACK_SKIP
* Number of stack trace entries to skip when finding callers.
* The first few entries belong to the locking implementation itself.
*/
#define CONTENTION_STACK_SKIP 3

static u64 sched_text_start;
static u64 sched_text_end;
static u64 lock_text_start;
static u64 lock_text_end;

static struct thread_stat *thread_stat_find(u32 tid)
{
struct rb_node *node;
Expand Down Expand Up @@ -839,6 +860,116 @@ static int report_lock_release_event(struct evsel *evsel,
return 0;
}

static bool is_lock_function(u64 addr)
{
if (!sched_text_start) {
struct machine *machine = &session->machines.host;
struct map *kmap;
struct symbol *sym;

sym = machine__find_kernel_symbol_by_name(machine,
"__sched_text_start",
&kmap);
if (!sym) {
/* to avoid retry */
sched_text_start = 1;
return false;
}

sched_text_start = kmap->unmap_ip(kmap, sym->start);

/* should not fail from here */
sym = machine__find_kernel_symbol_by_name(machine,
"__sched_text_end",
&kmap);
sched_text_end = kmap->unmap_ip(kmap, sym->start);

sym = machine__find_kernel_symbol_by_name(machine,
"__lock_text_start",
&kmap);
lock_text_start = kmap->unmap_ip(kmap, sym->start);

sym = machine__find_kernel_symbol_by_name(machine,
"__lock_text_end",
&kmap);
lock_text_start = kmap->unmap_ip(kmap, sym->start);
}

/* failed to get kernel symbols */
if (sched_text_start == 1)
return false;

/* mutex and rwsem functions are in sched text section */
if (sched_text_start <= addr && addr < sched_text_end)
return true;

/* spinlock functions are in lock text section */
if (lock_text_start <= addr && addr < lock_text_end)
return true;

return false;
}

static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
char *buf, int size)
{
struct thread *thread;
struct callchain_cursor *cursor = &callchain_cursor;
struct symbol *sym;
int skip = 0;
int ret;

/* lock names will be replaced to task name later */
if (show_thread_stats)
return -1;

thread = machine__findnew_thread(&session->machines.host,
-1, sample->pid);
if (thread == NULL)
return -1;

/* use caller function name from the callchain */
ret = thread__resolve_callchain(thread, cursor, evsel, sample,
NULL, NULL, CONTENTION_STACK_DEPTH);
if (ret != 0) {
thread__put(thread);
return -1;
}

callchain_cursor_commit(cursor);
thread__put(thread);

while (true) {
struct callchain_cursor_node *node;

node = callchain_cursor_current(cursor);
if (node == NULL)
break;

/* skip first few entries - for lock functions */
if (++skip <= CONTENTION_STACK_SKIP)
goto next;

sym = node->ms.sym;
if (sym && !is_lock_function(node->ip)) {
struct map *map = node->ms.map;
u64 offset;

offset = map->map_ip(map, node->ip) - sym->start;

if (offset)
scnprintf(buf, size, "%s+%#lx", sym->name, offset);
else
strlcpy(buf, sym->name, size);
return 0;
}

next:
callchain_cursor_advance(cursor);
}
return -1;
}

static int report_lock_contention_begin_event(struct evsel *evsel,
struct perf_sample *sample)
{
Expand All @@ -850,9 +981,18 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
if (show_thread_stats)
addr = sample->tid;

ls = lock_stat_findnew(addr, "No name");
if (!ls)
return -ENOMEM;
ls = lock_stat_find(addr);
if (!ls) {
char buf[128];
const char *caller = buf;

if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
caller = "Unknown";

ls = lock_stat_findnew(addr, caller);
if (!ls)
return -ENOMEM;
}

ts = thread_stat_findnew(sample->tid);
if (!ts)
Expand Down Expand Up @@ -1233,6 +1373,7 @@ static int __cmd_report(bool display_info)
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.mmap = perf_event__process_mmap,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
Expand All @@ -1248,6 +1389,8 @@ static int __cmd_report(bool display_info)
return PTR_ERR(session);
}

/* for lock function check */
symbol_conf.sort_by_name = true;
symbol__init(&session->header.env);

if (!perf_session__has_traces(session, "lock record"))
Expand Down Expand Up @@ -1292,8 +1435,12 @@ static int __cmd_record(int argc, const char **argv)
const char *record_args[] = {
"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
};
const char *callgraph_args[] = {
"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
};
unsigned int rec_argc, i, j, ret;
unsigned int nr_tracepoints;
unsigned int nr_callgraph_args = 0;
const char **rec_argv;
bool has_lock_stat = true;

Expand All @@ -1318,8 +1465,10 @@ static int __cmd_record(int argc, const char **argv)
}
}

nr_callgraph_args = ARRAY_SIZE(callgraph_args);

setup_args:
rec_argc = ARRAY_SIZE(record_args) + argc - 1;
rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;

if (has_lock_stat)
nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
Expand Down Expand Up @@ -1351,6 +1500,9 @@ static int __cmd_record(int argc, const char **argv)
rec_argv[i++] = ev_name;
}

for (j = 0; j < nr_callgraph_args; j++, i++)
rec_argv[i] = callgraph_args[j];

for (j = 1; j < (unsigned int)argc; j++, i++)
rec_argv[i] = argv[j];

Expand Down

0 comments on commit 0d2997f

Please sign in to comment.