Skip to content

Commit

Permalink
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
 "A bunch of perf tooling fixes:

   - Make the Intel PT SQL viewer more robust

   - Make the Intel PT debug log more useful

   - Support weak groups in perf record so it's behaving the same way as
     perf stat

   - Display the LBR stats in callchain entries properly in perf top

   - Handle different PMu names with common prefix properlin in pert
     stat

   - Start syscall augmenting in perf trace. Preparation for
     architecture independent eBPF instrumentation of syscalls.

   - Fix build breakage in JVMTI perf lib

   - Fix arm64 tools build failure wrt smp_load_{acquire,release}"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf tools: Do not zero sample_id_all for group members
  perf tools: Fix undefined symbol scnprintf in libperf-jvmti.so
  perf beauty: Use SRCARCH, ARCH=x86_64 must map to "x86" to find the headers
  perf intel-pt: Add MTC and CYC timestamps to debug log
  perf intel-pt: Add more event information to debug log
  perf scripts python: exported-sql-viewer.py: Fix table find when table re-ordered
  perf scripts python: exported-sql-viewer.py: Add help window
  perf scripts python: exported-sql-viewer.py: Add Selected branches report
  perf scripts python: exported-sql-viewer.py: Fall back to /usr/local/lib/libxed.so
  perf top: Display the LBR stats in callchain entry
  perf stat: Handle different PMU names with common prefix
  perf record: Support weak groups
  perf evlist: Move perf_evsel__reset_weak_group into evlist
  perf augmented_syscalls: Start collecting pathnames in the BPF program
  perf trace: Fix setting of augmented payload when using eBPF + raw_syscalls
  perf trace: When augmenting raw_syscalls plug raw_syscalls:sys_exit too
  perf examples bpf: Start augmenting raw_syscalls:sys_{start,exit}
  tools headers barrier: Fix arm64 tools build failure wrt smp_load_{acquire,release}
  • Loading branch information
torvalds committed Nov 11, 2018
2 parents 08b5278 + 45fd808 commit 655c6b9
Show file tree
Hide file tree
Showing 19 changed files with 820 additions and 121 deletions.
133 changes: 67 additions & 66 deletions tools/arch/arm64/include/asm/barrier.h
Expand Up @@ -14,74 +14,75 @@
#define wmb() asm volatile("dmb ishst" ::: "memory")
#define rmb() asm volatile("dmb ishld" ::: "memory")

#define smp_store_release(p, v) \
do { \
union { typeof(*p) __val; char __c[1]; } __u = \
{ .__val = (__force typeof(*p)) (v) }; \
\
switch (sizeof(*p)) { \
case 1: \
asm volatile ("stlrb %w1, %0" \
: "=Q" (*p) \
: "r" (*(__u8 *)__u.__c) \
: "memory"); \
break; \
case 2: \
asm volatile ("stlrh %w1, %0" \
: "=Q" (*p) \
: "r" (*(__u16 *)__u.__c) \
: "memory"); \
break; \
case 4: \
asm volatile ("stlr %w1, %0" \
: "=Q" (*p) \
: "r" (*(__u32 *)__u.__c) \
: "memory"); \
break; \
case 8: \
asm volatile ("stlr %1, %0" \
: "=Q" (*p) \
: "r" (*(__u64 *)__u.__c) \
: "memory"); \
break; \
default: \
/* Only to shut up gcc ... */ \
mb(); \
break; \
} \
#define smp_store_release(p, v) \
do { \
union { typeof(*p) __val; char __c[1]; } __u = \
{ .__val = (v) }; \
\
switch (sizeof(*p)) { \
case 1: \
asm volatile ("stlrb %w1, %0" \
: "=Q" (*p) \
: "r" (*(__u8_alias_t *)__u.__c) \
: "memory"); \
break; \
case 2: \
asm volatile ("stlrh %w1, %0" \
: "=Q" (*p) \
: "r" (*(__u16_alias_t *)__u.__c) \
: "memory"); \
break; \
case 4: \
asm volatile ("stlr %w1, %0" \
: "=Q" (*p) \
: "r" (*(__u32_alias_t *)__u.__c) \
: "memory"); \
break; \
case 8: \
asm volatile ("stlr %1, %0" \
: "=Q" (*p) \
: "r" (*(__u64_alias_t *)__u.__c) \
: "memory"); \
break; \
default: \
/* Only to shut up gcc ... */ \
mb(); \
break; \
} \
} while (0)

#define smp_load_acquire(p) \
({ \
union { typeof(*p) __val; char __c[1]; } __u; \
\
switch (sizeof(*p)) { \
case 1: \
asm volatile ("ldarb %w0, %1" \
: "=r" (*(__u8 *)__u.__c) \
: "Q" (*p) : "memory"); \
break; \
case 2: \
asm volatile ("ldarh %w0, %1" \
: "=r" (*(__u16 *)__u.__c) \
: "Q" (*p) : "memory"); \
break; \
case 4: \
asm volatile ("ldar %w0, %1" \
: "=r" (*(__u32 *)__u.__c) \
: "Q" (*p) : "memory"); \
break; \
case 8: \
asm volatile ("ldar %0, %1" \
: "=r" (*(__u64 *)__u.__c) \
: "Q" (*p) : "memory"); \
break; \
default: \
/* Only to shut up gcc ... */ \
mb(); \
break; \
} \
__u.__val; \
#define smp_load_acquire(p) \
({ \
union { typeof(*p) __val; char __c[1]; } __u = \
{ .__c = { 0 } }; \
\
switch (sizeof(*p)) { \
case 1: \
asm volatile ("ldarb %w0, %1" \
: "=r" (*(__u8_alias_t *)__u.__c) \
: "Q" (*p) : "memory"); \
break; \
case 2: \
asm volatile ("ldarh %w0, %1" \
: "=r" (*(__u16_alias_t *)__u.__c) \
: "Q" (*p) : "memory"); \
break; \
case 4: \
asm volatile ("ldar %w0, %1" \
: "=r" (*(__u32_alias_t *)__u.__c) \
: "Q" (*p) : "memory"); \
break; \
case 8: \
asm volatile ("ldar %0, %1" \
: "=r" (*(__u64_alias_t *)__u.__c) \
: "Q" (*p) : "memory"); \
break; \
default: \
/* Only to shut up gcc ... */ \
mb(); \
break; \
} \
__u.__val; \
})

#endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
1 change: 0 additions & 1 deletion tools/perf/Documentation/perf-list.txt
Expand Up @@ -55,7 +55,6 @@ counted. The following modifiers exist:
S - read sample value (PERF_SAMPLE_READ)
D - pin the event to the PMU
W - group is weak and will fallback to non-group if not schedulable,
only supported in 'perf stat' for now.

The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/Makefile.perf
Expand Up @@ -387,7 +387,7 @@ SHELL = $(SHELL_PATH)

linux_uapi_dir := $(srctree)/tools/include/uapi/linux
asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/
arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/

beauty_outdir := $(OUTPUT)trace/beauty/generated
beauty_ioctl_outdir := $(beauty_outdir)/ioctl
Expand Down
7 changes: 6 additions & 1 deletion tools/perf/builtin-record.c
Expand Up @@ -391,7 +391,12 @@ static int record__open(struct record *rec)
ui__warning("%s\n", msg);
goto try_again;
}

if ((errno == EINVAL || errno == EBADF) &&
pos->leader != pos &&
pos->weak_group) {
pos = perf_evlist__reset_weak_group(evlist, pos);
goto try_again;
}
rc = -errno;
perf_evsel__open_strerror(pos, &opts->target,
errno, msg, sizeof(msg));
Expand Down
28 changes: 1 addition & 27 deletions tools/perf/builtin-stat.c
Expand Up @@ -383,32 +383,6 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter)
return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
}

static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
{
struct perf_evsel *c2, *leader;
bool is_open = true;

leader = evsel->leader;
pr_debug("Weak group for %s/%d failed\n",
leader->name, leader->nr_members);

/*
* for_each_group_member doesn't work here because it doesn't
* include the first entry.
*/
evlist__for_each_entry(evsel_list, c2) {
if (c2 == evsel)
is_open = false;
if (c2->leader == leader) {
if (is_open)
perf_evsel__close(c2);
c2->leader = c2;
c2->nr_members = 0;
}
}
return leader;
}

static bool is_target_alive(struct target *_target,
struct thread_map *threads)
{
Expand Down Expand Up @@ -477,7 +451,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if ((errno == EINVAL || errno == EBADF) &&
counter->leader != counter &&
counter->weak_group) {
counter = perf_evsel__reset_weak_group(counter);
counter = perf_evlist__reset_weak_group(evsel_list, counter);
goto try_again;
}

Expand Down
3 changes: 3 additions & 0 deletions tools/perf/builtin-top.c
Expand Up @@ -1429,6 +1429,9 @@ int cmd_top(int argc, const char **argv)
}
}

if (opts->branch_stack && callchain_param.enabled)
symbol_conf.show_branchflag_count = true;

sort__mode = SORT_MODE__TOP;
/* display thread wants entries to be collapsed in a different tree */
perf_hpp_list.need_collapse = 1;
Expand Down
34 changes: 29 additions & 5 deletions tools/perf/builtin-trace.c
Expand Up @@ -108,6 +108,7 @@ struct trace {
} stats;
unsigned int max_stack;
unsigned int min_stack;
bool raw_augmented_syscalls;
bool not_ev_qualifier;
bool live;
bool full_time;
Expand Down Expand Up @@ -1724,13 +1725,28 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
return printed;
}

static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size)
static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented)
{
void *augmented_args = NULL;
/*
* For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
* and there we get all 6 syscall args plus the tracepoint common
* fields (sizeof(long)) and the syscall_nr (another long). So we check
* if that is the case and if so don't look after the sc->args_size,
* but always after the full raw_syscalls:sys_enter payload, which is
* fixed.
*
* We'll revisit this later to pass s->args_size to the BPF augmenter
* (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
* copies only what we need for each syscall, like what happens when we
* use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
* traffic to just what is needed for each syscall.
*/
int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size;

*augmented_args_size = sample->raw_size - sc->args_size;
*augmented_args_size = sample->raw_size - args_size;
if (*augmented_args_size > 0)
augmented_args = sample->raw_data + sc->args_size;
augmented_args = sample->raw_data + args_size;

return augmented_args;
}
Expand Down Expand Up @@ -1780,7 +1796,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
* here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
*/
if (evsel != trace->syscalls.events.sys_enter)
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
ttrace->entry_time = sample->time;
msg = ttrace->entry_str;
printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
Expand Down Expand Up @@ -1833,7 +1849,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse
goto out_put;

args = perf_evsel__sc_tp_ptr(evsel, args, sample);
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
fprintf(trace->output, "%s", msg);
err = 0;
Expand Down Expand Up @@ -3501,7 +3517,15 @@ int cmd_trace(int argc, const char **argv)
evsel->handler = trace__sys_enter;

evlist__for_each_entry(trace.evlist, evsel) {
bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;

if (raw_syscalls_sys_exit) {
trace.raw_augmented_syscalls = true;
goto init_augmented_syscall_tp;
}

if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
init_augmented_syscall_tp:
perf_evsel__init_augmented_syscall_tp(evsel);
perf_evsel__init_augmented_syscall_tp_ret(evsel);
evsel->handler = trace__sys_exit;
Expand Down

0 comments on commit 655c6b9

Please sign in to comment.