Skip to content

Commit

Permalink
perf_counter: Full task tracing
Browse files Browse the repository at this point in the history
In order to be able to distinguish between no samples due to
inactivity and no samples due to task ended, Arjan asked for
PERF_EVENT_EXIT events. This is useful to the boot delay
instrumentation (bootchart) app.

This patch changes the PERF_EVENT_FORK to be emitted on every
clone, and adds PERF_EVENT_EXIT to be emitted on task exit,
after the task's counters have been closed.

This task tracing is controlled through: attr.comm || attr.mmap
and through the new attr.task field.

Suggested-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ cleaned up perf_counter.h a bit ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Aug 2, 2009
1 parent e53c099 commit 9f498cc
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 33 deletions.
13 changes: 12 additions & 1 deletion include/linux/perf_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,9 @@ struct perf_counter_attr {
freq : 1, /* use freq, not period */
inherit_stat : 1, /* per task counts */
enable_on_exec : 1, /* next exec enables */
task : 1, /* trace fork/exit */

__reserved_1 : 51;
__reserved_1 : 50;

__u32 wakeup_events; /* wakeup every n events */
__u32 __reserved_2;
Expand Down Expand Up @@ -308,6 +309,15 @@ enum perf_event_type {
*/
PERF_EVENT_COMM = 3,

/*
* struct {
* struct perf_event_header header;
* u32 pid, ppid;
* u32 tid, ptid;
* };
*/
PERF_EVENT_EXIT = 4,

/*
* struct {
* struct perf_event_header header;
Expand All @@ -323,6 +333,7 @@ enum perf_event_type {
* struct {
* struct perf_event_header header;
* u32 pid, ppid;
* u32 tid, ptid;
* };
*/
PERF_EVENT_FORK = 7,
Expand Down
4 changes: 1 addition & 3 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
write_unlock_irq(&tasklist_lock);
proc_fork_connector(p);
cgroup_post_fork(p);
perf_counter_fork(p);
return p;

bad_fork_free_pid:
Expand Down Expand Up @@ -1410,9 +1411,6 @@ long do_fork(unsigned long clone_flags,
init_completion(&vfork);
}

if (!(clone_flags & CLONE_THREAD))
perf_counter_fork(p);

audit_finish_fork(p);
tracehook_report_clone(regs, clone_flags, nr, p);

Expand Down
87 changes: 58 additions & 29 deletions kernel/perf_counter.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ static int perf_overcommit __read_mostly = 1;
static atomic_t nr_counters __read_mostly;
static atomic_t nr_mmap_counters __read_mostly;
static atomic_t nr_comm_counters __read_mostly;
static atomic_t nr_task_counters __read_mostly;

/*
* perf counter paranoia level:
Expand Down Expand Up @@ -1654,6 +1655,8 @@ static void free_counter(struct perf_counter *counter)
atomic_dec(&nr_mmap_counters);
if (counter->attr.comm)
atomic_dec(&nr_comm_counters);
if (counter->attr.task)
atomic_dec(&nr_task_counters);
}

if (counter->destroy)
Expand Down Expand Up @@ -2831,48 +2834,55 @@ perf_counter_read_event(struct perf_counter *counter,
}

/*
* fork tracking
* task tracking -- fork/exit
*
* enabled by: attr.comm | attr.mmap | attr.task
*/

struct perf_fork_event {
struct perf_task_event {
struct task_struct *task;

struct {
struct perf_event_header header;

u32 pid;
u32 ppid;
u32 tid;
u32 ptid;
} event;
};

static void perf_counter_fork_output(struct perf_counter *counter,
struct perf_fork_event *fork_event)
static void perf_counter_task_output(struct perf_counter *counter,
struct perf_task_event *task_event)
{
struct perf_output_handle handle;
int size = fork_event->event.header.size;
struct task_struct *task = fork_event->task;
int size = task_event->event.header.size;
struct task_struct *task = task_event->task;
int ret = perf_output_begin(&handle, counter, size, 0, 0);

if (ret)
return;

fork_event->event.pid = perf_counter_pid(counter, task);
fork_event->event.ppid = perf_counter_pid(counter, task->real_parent);
task_event->event.pid = perf_counter_pid(counter, task);
task_event->event.ppid = perf_counter_pid(counter, task->real_parent);

perf_output_put(&handle, fork_event->event);
task_event->event.tid = perf_counter_tid(counter, task);
task_event->event.ptid = perf_counter_tid(counter, task->real_parent);

perf_output_put(&handle, task_event->event);
perf_output_end(&handle);
}

static int perf_counter_fork_match(struct perf_counter *counter)
static int perf_counter_task_match(struct perf_counter *counter)
{
if (counter->attr.comm || counter->attr.mmap)
if (counter->attr.comm || counter->attr.mmap || counter->attr.task)
return 1;

return 0;
}

static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
struct perf_fork_event *fork_event)
static void perf_counter_task_ctx(struct perf_counter_context *ctx,
struct perf_task_event *task_event)
{
struct perf_counter *counter;

Expand All @@ -2881,19 +2891,19 @@ static void perf_counter_fork_ctx(struct perf_counter_context *ctx,

rcu_read_lock();
list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
if (perf_counter_fork_match(counter))
perf_counter_fork_output(counter, fork_event);
if (perf_counter_task_match(counter))
perf_counter_task_output(counter, task_event);
}
rcu_read_unlock();
}

static void perf_counter_fork_event(struct perf_fork_event *fork_event)
static void perf_counter_task_event(struct perf_task_event *task_event)
{
struct perf_cpu_context *cpuctx;
struct perf_counter_context *ctx;

cpuctx = &get_cpu_var(perf_cpu_context);
perf_counter_fork_ctx(&cpuctx->ctx, fork_event);
perf_counter_task_ctx(&cpuctx->ctx, task_event);
put_cpu_var(perf_cpu_context);

rcu_read_lock();
Expand All @@ -2903,32 +2913,40 @@ static void perf_counter_fork_event(struct perf_fork_event *fork_event)
*/
ctx = rcu_dereference(current->perf_counter_ctxp);
if (ctx)
perf_counter_fork_ctx(ctx, fork_event);
perf_counter_task_ctx(ctx, task_event);
rcu_read_unlock();
}

void perf_counter_fork(struct task_struct *task)
static void perf_counter_task(struct task_struct *task, int new)
{
struct perf_fork_event fork_event;
struct perf_task_event task_event;

if (!atomic_read(&nr_comm_counters) &&
!atomic_read(&nr_mmap_counters))
!atomic_read(&nr_mmap_counters) &&
!atomic_read(&nr_task_counters))
return;

fork_event = (struct perf_fork_event){
task_event = (struct perf_task_event){
.task = task,
.event = {
.header = {
.type = PERF_EVENT_FORK,
.type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
.misc = 0,
.size = sizeof(fork_event.event),
.size = sizeof(task_event.event),
},
/* .pid */
/* .ppid */
/* .tid */
/* .ptid */
},
};

perf_counter_fork_event(&fork_event);
perf_counter_task_event(&task_event);
}

void perf_counter_fork(struct task_struct *task)
{
perf_counter_task(task, 1);
}

/*
Expand Down Expand Up @@ -3887,6 +3905,8 @@ perf_counter_alloc(struct perf_counter_attr *attr,
atomic_inc(&nr_mmap_counters);
if (counter->attr.comm)
atomic_inc(&nr_comm_counters);
if (counter->attr.task)
atomic_inc(&nr_task_counters);
}

return counter;
Expand Down Expand Up @@ -4248,8 +4268,10 @@ void perf_counter_exit_task(struct task_struct *child)
struct perf_counter_context *child_ctx;
unsigned long flags;

if (likely(!child->perf_counter_ctxp))
if (likely(!child->perf_counter_ctxp)) {
perf_counter_task(child, 0);
return;
}

local_irq_save(flags);
/*
Expand All @@ -4267,15 +4289,22 @@ void perf_counter_exit_task(struct task_struct *child)
* incremented the context's refcount before we do put_ctx below.
*/
spin_lock(&child_ctx->lock);
child->perf_counter_ctxp = NULL;
/*
* If this context is a clone; unclone it so it can't get
* swapped to another process while we're removing all
* the counters from it.
*/
unclone_ctx(child_ctx);
spin_unlock(&child_ctx->lock);
local_irq_restore(flags);
spin_unlock_irqrestore(&child_ctx->lock, flags);

/*
* Report the task dead after unscheduling the counters so that we
* won't get any samples after PERF_EVENT_EXIT. We can however still
* get a few PERF_EVENT_READ events.
*/
perf_counter_task(child, 0);

child->perf_counter_ctxp = NULL;

/*
* We can recurse on the same lock type through:
Expand Down

0 comments on commit 9f498cc

Please sign in to comment.