Skip to content

Commit

Permalink
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
 - fix for do_div() abuse on x86
 - locking fix in perf core
 - a pile of (build) fixes and cleanups in perf tools

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
  perf/x86: Fix incorrect use of do_div() in NMI warning
  perf: Fix perf_lock_task_context() vs RCU
  perf: Remove WARN_ON_ONCE() check in __perf_event_enable() for valid scenario
  perf: Clone child context from parent context pmu
  perf script: Fix broken include in Context.xs
  perf tools: Fix -ldw/-lelf link test when static linking
  perf tools: Revert regression in configuration of Python support
  perf tools: Fix perf version generation
  perf stat: Fix per-socket output bug for uncore events
  perf symbols: Fix vdso list searching
  perf evsel: Fix missing increment in sample parsing
  perf tools: Update symbol_conf.nr_events when processing attribute events
  perf tools: Fix new_term() missing free on error path
  perf tools: Fix parse_events_terms() segfault on error path
  perf evsel: Fix count parameter to read call in event_format__new
  perf tools: fix a typo of a Power7 event name
  perf tools: Fix -x/--exclude-other option for report command
  perf evlist: Enhance perf_evlist__start_workload()
  perf record: Remove -f/--force option
  perf record: Remove -A/--append option
  ...
  • Loading branch information
torvalds committed Jul 13, 2013
2 parents 4fa109b + baf64b8 commit 560ae37
Show file tree
Hide file tree
Showing 34 changed files with 154 additions and 163 deletions.
Expand Up @@ -29,7 +29,7 @@ Description: Generic performance monitoring events

What: /sys/devices/cpu/events/PM_1PLUS_PPC_CMPL
/sys/devices/cpu/events/PM_BRU_FIN
/sys/devices/cpu/events/PM_BRU_MPRED
/sys/devices/cpu/events/PM_BR_MPRED
/sys/devices/cpu/events/PM_CMPLU_STALL
/sys/devices/cpu/events/PM_CMPLU_STALL_BRU
/sys/devices/cpu/events/PM_CMPLU_STALL_DCACHE_MISS
Expand Down
12 changes: 6 additions & 6 deletions arch/powerpc/perf/power7-pmu.c
Expand Up @@ -60,7 +60,7 @@
#define PME_PM_LD_REF_L1 0xc880
#define PME_PM_LD_MISS_L1 0x400f0
#define PME_PM_BRU_FIN 0x10068
#define PME_PM_BRU_MPRED 0x400f6
#define PME_PM_BR_MPRED 0x400f6

#define PME_PM_CMPLU_STALL_FXU 0x20014
#define PME_PM_CMPLU_STALL_DIV 0x40014
Expand Down Expand Up @@ -349,7 +349,7 @@ static int power7_generic_events[] = {
[PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1,
[PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN,
[PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BRU_MPRED,
[PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BR_MPRED,
};

#define C(x) PERF_COUNT_HW_CACHE_##x
Expand Down Expand Up @@ -405,7 +405,7 @@ GENERIC_EVENT_ATTR(instructions, INST_CMPL);
GENERIC_EVENT_ATTR(cache-references, LD_REF_L1);
GENERIC_EVENT_ATTR(cache-misses, LD_MISS_L1);
GENERIC_EVENT_ATTR(branch-instructions, BRU_FIN);
GENERIC_EVENT_ATTR(branch-misses, BRU_MPRED);
GENERIC_EVENT_ATTR(branch-misses, BR_MPRED);

POWER_EVENT_ATTR(CYC, CYC);
POWER_EVENT_ATTR(GCT_NOSLOT_CYC, GCT_NOSLOT_CYC);
Expand All @@ -414,7 +414,7 @@ POWER_EVENT_ATTR(INST_CMPL, INST_CMPL);
POWER_EVENT_ATTR(LD_REF_L1, LD_REF_L1);
POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1);
POWER_EVENT_ATTR(BRU_FIN, BRU_FIN)
POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED);
POWER_EVENT_ATTR(BR_MPRED, BR_MPRED);

POWER_EVENT_ATTR(CMPLU_STALL_FXU, CMPLU_STALL_FXU);
POWER_EVENT_ATTR(CMPLU_STALL_DIV, CMPLU_STALL_DIV);
Expand Down Expand Up @@ -449,7 +449,7 @@ static struct attribute *power7_events_attr[] = {
GENERIC_EVENT_PTR(LD_REF_L1),
GENERIC_EVENT_PTR(LD_MISS_L1),
GENERIC_EVENT_PTR(BRU_FIN),
GENERIC_EVENT_PTR(BRU_MPRED),
GENERIC_EVENT_PTR(BR_MPRED),

POWER_EVENT_PTR(CYC),
POWER_EVENT_PTR(GCT_NOSLOT_CYC),
Expand All @@ -458,7 +458,7 @@ static struct attribute *power7_events_attr[] = {
POWER_EVENT_PTR(LD_REF_L1),
POWER_EVENT_PTR(LD_MISS_L1),
POWER_EVENT_PTR(BRU_FIN),
POWER_EVENT_PTR(BRU_MPRED),
POWER_EVENT_PTR(BR_MPRED),

POWER_EVENT_PTR(CMPLU_STALL_FXU),
POWER_EVENT_PTR(CMPLU_STALL_DIV),
Expand Down
7 changes: 4 additions & 3 deletions arch/x86/kernel/nmi.c
Expand Up @@ -111,7 +111,7 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
*/
list_for_each_entry_rcu(a, &desc->head, list) {
u64 before, delta, whole_msecs;
int decimal_msecs, thishandled;
int remainder_ns, decimal_msecs, thishandled;

before = local_clock();
thishandled = a->handler(type, regs);
Expand All @@ -123,8 +123,9 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
continue;

nmi_longest_ns = delta;
whole_msecs = do_div(delta, (1000 * 1000));
decimal_msecs = do_div(delta, 1000) % 1000;
whole_msecs = delta;
remainder_ns = do_div(whole_msecs, (1000 * 1000));
decimal_msecs = remainder_ns / 1000;
printk_ratelimited(KERN_INFO
"INFO: NMI handler (%ps) took too long to run: "
"%lld.%03d msecs\n", a->handler, whole_msecs,
Expand Down
28 changes: 25 additions & 3 deletions kernel/events/core.c
Expand Up @@ -947,8 +947,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
{
struct perf_event_context *ctx;

rcu_read_lock();
retry:
/*
* One of the few rules of preemptible RCU is that one cannot do
* rcu_read_unlock() while holding a scheduler (or nested) lock when
* part of the read side critical section was preemptible -- see
* rcu_read_unlock_special().
*
* Since ctx->lock nests under rq->lock we must ensure the entire read
* side critical section is non-preemptible.
*/
preempt_disable();
rcu_read_lock();
ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
if (ctx) {
/*
Expand All @@ -964,6 +974,8 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
raw_spin_lock_irqsave(&ctx->lock, *flags);
if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
raw_spin_unlock_irqrestore(&ctx->lock, *flags);
rcu_read_unlock();
preempt_enable();
goto retry;
}

Expand All @@ -973,6 +985,7 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
}
}
rcu_read_unlock();
preempt_enable();
return ctx;
}

Expand Down Expand Up @@ -1950,7 +1963,16 @@ static int __perf_event_enable(void *info)
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
int err;

if (WARN_ON_ONCE(!ctx->is_active))
/*
* There's a time window between 'ctx->is_active' check
* in perf_event_enable function and this place having:
* - IRQs on
* - ctx->lock unlocked
*
* where the task could be killed and 'ctx' deactivated
* by perf_event_exit_task.
*/
if (!ctx->is_active)
return -EINVAL;

raw_spin_lock(&ctx->lock);
Expand Down Expand Up @@ -7465,7 +7487,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
* child.
*/

child_ctx = alloc_perf_context(event->pmu, child);
child_ctx = alloc_perf_context(parent_ctx->pmu, child);
if (!child_ctx)
return -ENOMEM;

Expand Down
17 changes: 16 additions & 1 deletion tools/lib/lk/Makefile
Expand Up @@ -3,6 +3,21 @@ include ../../scripts/Makefile.include
CC = $(CROSS_COMPILE)gcc
AR = $(CROSS_COMPILE)ar

# Makefiles suck: This macro sets a default value of $(2) for the
# variable named by $(1), unless the variable has been set by
# environment or command line. This is necessary for CC and AR
# because make sets default values, so the simpler ?= approach
# won't work as expected.
define allow-override
$(if $(or $(findstring environment,$(origin $(1))),\
$(findstring command line,$(origin $(1)))),,\
$(eval $(1) = $(2)))
endef

# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,AR,$(CROSS_COMPILE)ar)

# guard against environment variables
LIB_H=
LIB_OBJS=
Expand All @@ -14,7 +29,7 @@ LIB_OBJS += $(OUTPUT)debugfs.o
LIBFILE = liblk.a

CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -fPIC
EXTLIBS = -lpthread -lrt -lelf -lm
EXTLIBS = -lelf -lpthread -lrt -lm
ALL_CFLAGS = $(CFLAGS) $(BASIC_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
ALL_LDFLAGS = $(LDFLAGS)

Expand Down
12 changes: 3 additions & 9 deletions tools/perf/Documentation/Makefile
@@ -1,12 +1,6 @@
include ../../scripts/Makefile.include
include ../config/utilities.mak

OUTPUT := ./
ifeq ("$(origin O)", "command line")
ifneq ($(O),)
OUTPUT := $(O)/
endif
endif

MAN1_TXT= \
$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
$(wildcard perf-*.txt)) \
Expand Down Expand Up @@ -150,7 +144,7 @@ NO_SUBDIR = :
endif

ifneq ($(findstring $(MAKEFLAGS),s),s)
ifndef V
ifneq ($(V),1)
QUIET_ASCIIDOC = @echo ' ' ASCIIDOC $@;
QUIET_XMLTO = @echo ' ' XMLTO $@;
QUIET_DB2TEXI = @echo ' ' DB2TEXI $@;
Expand Down Expand Up @@ -277,7 +271,7 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt

$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
$(QUIET_XMLTO)$(RM) $@ && \
$(XMLTO) -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<

$(OUTPUT)%.xml : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
Expand Down
4 changes: 2 additions & 2 deletions tools/perf/Documentation/examples.txt
Expand Up @@ -66,7 +66,7 @@ Furthermore, these tracepoints can be used to sample the workload as
well. For example the page allocations done by a 'git gc' can be
captured the following way:

titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc
titan:~/git> perf record -e kmem:mm_page_alloc -c 1 ./git gc
Counting objects: 1148, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (450/450), done.
Expand Down Expand Up @@ -120,7 +120,7 @@ Furthermore, call-graph sampling can be done too, of page
allocations - to see precisely what kind of page allocations there
are:

titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc
titan:~/git> perf record -g -e kmem:mm_page_alloc -c 1 ./git gc
Counting objects: 1148, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (450/450), done.
Expand Down
8 changes: 1 addition & 7 deletions tools/perf/Documentation/perf-record.txt
Expand Up @@ -65,16 +65,10 @@ OPTIONS
-r::
--realtime=::
Collect data with this RT SCHED_FIFO priority.

-D::
--no-delay::
Collect data without buffering.
-A::
--append::
Append to the output file to do incremental profiling.

-f::
--force::
Overwrite existing data file. (deprecated)

-c::
--count=::
Expand Down
5 changes: 2 additions & 3 deletions tools/perf/Makefile
Expand Up @@ -121,17 +121,16 @@ SCRIPT_SH += perf-archive.sh
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))

LK_PATH=$(LK_DIR)

ifneq ($(OUTPUT),)
TE_PATH=$(OUTPUT)
ifneq ($(subdir),)
LK_PATH=$(OUTPUT)$(LK_DIR)
LK_PATH=$(objtree)/lib/lk/
else
LK_PATH=$(OUTPUT)
endif
else
TE_PATH=$(TRACE_EVENT_DIR)
LK_PATH=$(LK_DIR)
endif

LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
Expand Down
4 changes: 2 additions & 2 deletions tools/perf/bench/mem-memcpy.c
Expand Up @@ -111,11 +111,11 @@ static double timeval2double(struct timeval *ts)
static void alloc_mem(void **dst, void **src, size_t length)
{
*dst = zalloc(length);
if (!dst)
if (!*dst)
die("memory allocation failed - maybe length is too large?\n");

*src = zalloc(length);
if (!src)
if (!*src)
die("memory allocation failed - maybe length is too large?\n");
}

Expand Down
2 changes: 1 addition & 1 deletion tools/perf/bench/mem-memset.c
Expand Up @@ -111,7 +111,7 @@ static double timeval2double(struct timeval *ts)
static void alloc_mem(void **dst, size_t length)
{
*dst = zalloc(length);
if (!dst)
if (!*dst)
die("memory allocation failed - maybe length is too large?\n");
}

Expand Down
1 change: 0 additions & 1 deletion tools/perf/builtin-diff.c
Expand Up @@ -607,7 +607,6 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
input_new = "perf.data.guest";
}

symbol_conf.exclude_other = false;
if (symbol__init() < 0)
return -1;

Expand Down
2 changes: 1 addition & 1 deletion tools/perf/builtin-kmem.c
Expand Up @@ -708,7 +708,7 @@ static int parse_line_opt(const struct option *opt __maybe_unused,
static int __cmd_record(int argc, const char **argv)
{
const char * const record_args[] = {
"record", "-a", "-R", "-f", "-c", "1",
"record", "-a", "-R", "-c", "1",
"-e", "kmem:kmalloc",
"-e", "kmem:kmalloc_node",
"-e", "kmem:kfree",
Expand Down
2 changes: 1 addition & 1 deletion tools/perf/builtin-lock.c
Expand Up @@ -878,7 +878,7 @@ static int __cmd_report(void)
static int __cmd_record(int argc, const char **argv)
{
const char *record_args[] = {
"record", "-R", "-f", "-m", "1024", "-c", "1",
"record", "-R", "-m", "1024", "-c", "1",
};
unsigned int rec_argc, i, j;
const char **rec_argv;
Expand Down

0 comments on commit 560ae37

Please sign in to comment.