Permalink
Browse files

Merge branch 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm…

…/linux/kernel/git/tip/tip

Pull user-space probe instrumentation from Ingo Molnar:
 "The uprobes code originates from SystemTap and has been used for years
  in Fedora and RHEL kernels.  This version is much rewritten, reviews
  from PeterZ, Oleg and myself shaped the end result.

  This tree includes uprobes support in 'perf probe' - but SystemTap
  (and other tools) can take advantage of user probe points as well.

  Sample usage of uprobes via perf, for example to profile malloc()
  calls without modifying user-space binaries.

  First boot a new kernel with CONFIG_UPROBE_EVENT=y enabled.

  If you don't know which function you want to probe you can pick one
  from 'perf top' or can get a list all functions that can be probed
  within libc (binaries can be specified as well):

	$ perf probe -F -x /lib/libc.so.6

  To probe libc's malloc():

	$ perf probe -x /lib64/libc.so.6 malloc
	Added new event:
	probe_libc:malloc    (on 0x7eac0)

  You can now use it in all perf tools, such as:

	perf record -e probe_libc:malloc -aR sleep 1

  Make use of it to create a call graph (as the flat profile is going to
  look very boring):

	$ perf record -e probe_libc:malloc -gR make
	[ perf record: Woken up 173 times to write data ]
	[ perf record: Captured and wrote 44.190 MB perf.data (~1930712

	$ perf report | less

	  32.03%            git  libc-2.15.so   [.] malloc
	                    |
	                    --- malloc

	  29.49%            cc1  libc-2.15.so   [.] malloc
	                    |
	                    --- malloc
	                       |
	                       |--0.95%-- 0x208eb1000000000
	                       |
	                       |--0.63%-- htab_traverse_noresize

	  11.04%             as  libc-2.15.so   [.] malloc
	                     |
	                     --- malloc
	                        |

	   7.15%             ld  libc-2.15.so   [.] malloc
	                     |
	                     --- malloc
	                        |

	   5.07%             sh  libc-2.15.so   [.] malloc
	                     |
	                     --- malloc
	                        |
	   4.99%  python-config  libc-2.15.so   [.] malloc
	          |
	          --- malloc
	             |
	   4.54%           make  libc-2.15.so   [.] malloc
	                   |
	                   --- malloc
	                      |
	                      |--7.34%-- glob
	                      |          |
	                      |          |--93.18%-- 0x41588f
	                      |          |
	                      |           --6.82%-- glob
	                      |                     0x41588f

	   ...

  Or:

	$ perf report -g flat | less

	# Overhead        Command  Shared Object      Symbol
	# ........  .............  .............  ..........
	#
	  32.03%            git  libc-2.15.so   [.] malloc
	          27.19%
	              malloc

	  29.49%            cc1  libc-2.15.so   [.] malloc
	          24.77%
	              malloc

	  11.04%             as  libc-2.15.so   [.] malloc
	          11.02%
	              malloc

	   7.15%             ld  libc-2.15.so   [.] malloc
	           6.57%
	              malloc

	 ...

  The core uprobes design is fairly straightforward: uprobes probe
  points register themselves at (inode:offset) addresses of
  libraries/binaries, after which all existing (or new) vmas that map
  that address will have a software breakpoint injected at that address.
  vmas are COW-ed to preserve original content.  The probe points are
  kept in an rbtree.

  If user-space executes the probed inode:offset instruction address
  then an event is generated which can be recovered from the regular
  perf event channels and mmap-ed ring-buffer.

  Multiple probes at the same address are supported, they create a
  dynamic callback list of event consumers.

  The basic model is further complicated by the XOL speedup: the
  original instruction that is probed is copied (in an architecture
  specific fashion) and executed out of line when the probe triggers.
  The XOL area is a single vma per process, with a fixed number of
  entries (which limits probe execution parallelism).

  The API: uprobes are installed/removed via
  /sys/kernel/debug/tracing/uprobe_events, the API is integrated to
  align with the kprobes interface as much as possible, but is separate
  to it.

  Injecting a probe point is privileged operation, which can be relaxed
  by setting perf_paranoid to -1.

  You can use multiple probes as well and mix them with kprobes and
  regular PMU events or tracepoints, when instrumenting a task."

Fix up trivial conflicts in mm/memory.c due to previous cleanup of
unmap_single_vma().

* 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  perf probe: Detect probe target when m/x options are absent
  perf probe: Provide perf interface for uprobes
  tracing: Fix kconfig warning due to a typo
  tracing: Provide trace events interface for uprobes
  tracing: Extract out common code for kprobes/uprobes trace events
  tracing: Modify is_delete, is_return from int to bool
  uprobes/core: Decrement uprobe count before the pages are unmapped
  uprobes/core: Make background page replacement logic account for rss_stat counters
  uprobes/core: Optimize probe hits with the help of a counter
  uprobes/core: Allocate XOL slots for uprobes use
  uprobes/core: Handle breakpoint and singlestep exceptions
  uprobes/core: Rename bkpt to swbp
  uprobes/core: Make order of function parameters consistent across functions
  uprobes/core: Make macro names consistent
  uprobes: Update copyright notices
  uprobes/core: Move insn to arch specific structure
  uprobes/core: Remove uprobe_opcode_sz
  uprobes/core: Make instruction tables volatile
  uprobes: Move to kernel/events/
  uprobes/core: Clean up, refactor and improve the code
  ...
  • Loading branch information...
2 parents 2c01e7b + 9cba26e commit 654443e20dfc0617231f28a07c96a979ee1a0239 @torvalds committed May 24, 2012
@@ -0,0 +1,113 @@
+ Uprobe-tracer: Uprobe-based Event Tracing
+ =========================================
+ Documentation written by Srikar Dronamraju
+
+Overview
+--------
+Uprobe based trace events are similar to kprobe based trace events.
+To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y.
+
+Similar to the kprobe-event tracer, this doesn't need to be activated via
+current_tracer. Instead of that, add probe points via
+/sys/kernel/debug/tracing/uprobe_events, and enable it via
+/sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled.
+
+However unlike kprobe-event tracer, the uprobe event interface expects the
+user to calculate the offset of the probepoint in the object
+
+Synopsis of uprobe_tracer
+-------------------------
+ p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe
+
+ GRP : Group name. If omitted, use "uprobes" for it.
+ EVENT : Event name. If omitted, the event name is generated
+ based on SYMBOL+offs.
+ PATH : path to an executable or a library.
+ SYMBOL[+offs] : Symbol+offset where the probe is inserted.
+
+ FETCHARGS : Arguments. Each probe can have up to 128 args.
+ %REG : Fetch register REG
+
+Event Profiling
+---------------
+ You can check the total number of probe hits and probe miss-hits via
+/sys/kernel/debug/tracing/uprobe_profile.
+ The first column is event name, the second is the number of probe hits,
+the third is the number of probe miss-hits.
+
+Usage examples
+--------------
+To add a probe as a new event, write a new definition to uprobe_events
+as below.
+
+ echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
+
+ This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash
+
+ echo > /sys/kernel/debug/tracing/uprobe_events
+
+ This clears all probe points.
+
+The following example shows how to dump the instruction pointer and %ax
+a register at the probed text address. Here we are trying to probe
+function zfree in /bin/zsh
+
+ # cd /sys/kernel/debug/tracing/
+ # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp
+ 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh
+ # objdump -T /bin/zsh | grep -w zfree
+ 0000000000446420 g DF .text 0000000000000012 Base zfree
+
+0x46420 is the offset of zfree in object /bin/zsh that is loaded at
+0x00400000. Hence the command to probe would be :
+
+ # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events
+
+Please note: User has to explicitly calculate the offset of the probepoint
+in the object. We can see the events that are registered by looking at the
+uprobe_events file.
+
+ # cat uprobe_events
+ p:uprobes/p_zsh_0x46420 /bin/zsh:0x00046420 arg1=%ip arg2=%ax
+
+The format of events can be seen by viewing the file events/uprobes/p_zsh_0x46420/format
+
+ # cat events/uprobes/p_zsh_0x46420/format
+ name: p_zsh_0x46420
+ ID: 922
+ format:
+ field:unsigned short common_type; offset:0; size:2; signed:0;
+ field:unsigned char common_flags; offset:2; size:1; signed:0;
+ field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
+ field:int common_pid; offset:4; size:4; signed:1;
+ field:int common_padding; offset:8; size:4; signed:1;
+
+ field:unsigned long __probe_ip; offset:12; size:4; signed:0;
+ field:u32 arg1; offset:16; size:4; signed:0;
+ field:u32 arg2; offset:20; size:4; signed:0;
+
+ print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2
+
+Right after definition, each event is disabled by default. For tracing these
+events, you need to enable it by:
+
+ # echo 1 > events/uprobes/enable
+
+Lets disable the event after sleeping for some time.
+ # sleep 20
+ # echo 0 > events/uprobes/enable
+
+And you can see the traced information via /sys/kernel/debug/tracing/trace.
+
+ # cat trace
+ # tracer: nop
+ #
+ # TASK-PID CPU# TIMESTAMP FUNCTION
+ # | | | | |
+ zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
+ zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
+ zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
+ zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
+
+Each line shows us probes were triggered for a pid 24842 with ip being
+0x446421 and contents of ax register being 79.
View
@@ -76,6 +76,23 @@ config OPTPROBES
depends on KPROBES && HAVE_OPTPROBES
depends on !PREEMPT
+config UPROBES
+ bool "Transparent user-space probes (EXPERIMENTAL)"
+ depends on UPROBE_EVENT && PERF_EVENTS
+ default n
+ help
+ Uprobes is the user-space counterpart to kprobes: they
+ enable instrumentation applications (such as 'perf probe')
+ to establish unintrusive probes in user-space binaries and
+ libraries, by executing handler functions when the probes
+ are hit by user-space applications.
+
+ ( These probes come in the form of single-byte breakpoints,
+ managed by the kernel and kept transparent to the probed
+ application. )
+
+ If in doubt, say "N".
+
config HAVE_EFFICIENT_UNALIGNED_ACCESS
bool
help
View
@@ -87,7 +87,7 @@ config X86
select BUILDTIME_EXTABLE_SORT
config INSTRUCTION_DECODER
- def_bool (KPROBES || PERF_EVENTS)
+ def_bool (KPROBES || PERF_EVENTS || UPROBES)
config OUTPUT_FORMAT
string
@@ -243,6 +243,9 @@ config ARCH_CPU_PROBE_RELEASE
def_bool y
depends on HOTPLUG_CPU
+config ARCH_SUPPORTS_UPROBES
+ def_bool y
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -85,6 +85,7 @@ struct thread_info {
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
+#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_FORK 18 /* ret_from_fork */
@@ -109,6 +110,7 @@ struct thread_info {
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
+#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
@@ -0,0 +1,57 @@
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+/*
+ * User-space Probes (UProbes) for x86
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2008-2011
+ * Authors:
+ * Srikar Dronamraju
+ * Jim Keniston
+ */
+
+#include <linux/notifier.h>
+
+typedef u8 uprobe_opcode_t;
+
+#define MAX_UINSN_BYTES 16
+#define UPROBE_XOL_SLOT_BYTES 128 /* to keep it cache aligned */
+
+#define UPROBE_SWBP_INSN 0xcc
+#define UPROBE_SWBP_INSN_SIZE 1
+
+struct arch_uprobe {
+ u16 fixups;
+ u8 insn[MAX_UINSN_BYTES];
+#ifdef CONFIG_X86_64
+ unsigned long rip_rela_target_address;
+#endif
+};
+
+struct arch_uprobe_task {
+ unsigned long saved_trap_nr;
+#ifdef CONFIG_X86_64
+ unsigned long saved_scratch_register;
+#endif
+};
+
+extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm);
+extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
+extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
+extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+#endif /* _ASM_UPROBES_H */
View
@@ -100,6 +100,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_OF) += devicetree.o
+obj-$(CONFIG_UPROBES) += uprobes.o
###
# 64 bit specific files
View
@@ -18,6 +18,7 @@
#include <linux/personality.h>
#include <linux/uaccess.h>
#include <linux/user-return-notifier.h>
+#include <linux/uprobes.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
@@ -814,6 +815,11 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
mce_notify_process();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
+ if (thread_info_flags & _TIF_UPROBE) {
+ clear_thread_flag(TIF_UPROBE);
+ uprobe_notify_resume(regs);
+ }
+
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
Oops, something went wrong.

0 comments on commit 654443e

Please sign in to comment.