Skip to content

Commit

Permalink
arm64: implement ftrace with regs
Browse files Browse the repository at this point in the history
This patch implements FTRACE_WITH_REGS for arm64, which allows a traced
function's arguments (and some other registers) to be captured into a
struct pt_regs, allowing these to be inspected and/or modified. This is
a building block for live-patching, where a function's arguments may be
forwarded to another function. This is also necessary to enable ftrace
and in-kernel pointer authentication at the same time, as it allows the
LR value to be captured and adjusted prior to signing.

Using GCC's -fpatchable-function-entry=N option, we can have the
compiler insert a configurable number of NOPs between the function entry
point and the usual prologue. This also ensures functions are AAPCS
compliant (e.g. disabling inter-procedural register allocation).

For example, with -fpatchable-function-entry=2, GCC 8.1.0 compiles the
following:

| unsigned long bar(void);
|
| unsigned long foo(void)
| {
|         return bar() + 1;
| }

... to:

| <foo>:
|         nop
|         nop
|         stp     x29, x30, [sp, #-16]!
|         mov     x29, sp
|         bl      0 <bar>
|         add     x0, x0, #0x1
|         ldp     x29, x30, [sp], #16
|         ret

This patch builds the kernel with -fpatchable-function-entry=2,
prefixing each function with two NOPs. To trace a function, we replace
these NOPs with a sequence that saves the LR into a GPR, then calls an
ftrace entry assembly function which saves this and other relevant
registers:

| mov	x9, x30
| bl	<ftrace-entry>

Since patchable functions are AAPCS compliant (and the kernel does not
use x18 as a platform register), x9-x18 can be safely clobbered in the
patched sequence and the ftrace entry code.

There are now two ftrace entry functions, ftrace_regs_entry (which saves
all GPRs), and ftrace_entry (which saves the bare minimum). A PLT is
allocated for each within modules.

Signed-off-by: Torsten Duwe <duwe@suse.de>
[Mark: rework asm, comments, PLTs, initialization, commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Torsten Duwe <duwe@suse.de>
Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Torsten Duwe <duwe@suse.de>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Julien Thierry <jthierry@redhat.com>
Cc: Will Deacon <will@kernel.org>
  • Loading branch information
Torsten Duwe authored and Mark Rutland committed Nov 6, 2019
1 parent 1f377e0 commit 3b23e49
Show file tree
Hide file tree
Showing 8 changed files with 252 additions and 25 deletions.
2 changes: 2 additions & 0 deletions arch/arm64/Kconfig
Expand Up @@ -143,6 +143,8 @@ config ARM64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS \
if $(cc-option,-fpatchable-function-entry=2)
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
Expand Down
5 changes: 5 additions & 0 deletions arch/arm64/Makefile
Expand Up @@ -95,6 +95,11 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDS_MODULE += $(srctree)/arch/arm64/kernel/module.lds
endif

ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
endif

# Default value
head-y := arch/arm64/kernel/head.o

Expand Down
23 changes: 23 additions & 0 deletions arch/arm64/include/asm/ftrace.h
Expand Up @@ -11,9 +11,20 @@
#include <asm/insn.h>

#define HAVE_FUNCTION_GRAPH_FP_TEST

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
#define ARCH_SUPPORTS_FTRACE_OPS 1
#else
#define MCOUNT_ADDR ((unsigned long)_mcount)
#endif

/* The BL at the callsite's adjusted rec->ip */
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE

#define FTRACE_PLT_IDX 0
#define FTRACE_REGS_PLT_IDX 1
#define NR_FTRACE_PLTS 2

/*
* Currently, gcc tends to save the link register after the local variables
* on the stack. This causes the max stack tracer to report the function
Expand Down Expand Up @@ -43,13 +54,25 @@ extern void return_to_handler(void);

static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
/*
* Adjust addr to point at the BL in the callsite.
* See ftrace_init_nop() for the callsite sequence.
*/
if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
return addr + AARCH64_INSN_SIZE;
/*
* addr is the address of the mcount call instruction.
* recordmcount does the necessary offset calculation.
*/
return addr;
}

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
struct dyn_ftrace;
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
#define ftrace_init_nop ftrace_init_nop
#endif

#define ftrace_return_address(n) return_address(n)

/*
Expand Down
2 changes: 1 addition & 1 deletion arch/arm64/include/asm/module.h
Expand Up @@ -21,7 +21,7 @@ struct mod_arch_specific {
struct mod_plt_sec init;

/* for CONFIG_DYNAMIC_FTRACE */
struct plt_entry *ftrace_trampoline;
struct plt_entry *ftrace_trampolines;
};
#endif

Expand Down
140 changes: 135 additions & 5 deletions arch/arm64/kernel/entry-ftrace.S
Expand Up @@ -7,10 +7,137 @@
*/

#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/ftrace.h>
#include <asm/insn.h>

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
/*
* Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
* the regular function prologue. For an enabled callsite, ftrace_init_nop() and
* ftrace_make_call() have patched those NOPs to:
*
* MOV X9, LR
* BL <entry>
*
* ... where <entry> is either ftrace_caller or ftrace_regs_caller.
*
* Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
* live, and x9-x18 are safe to clobber.
*
* We save the callsite's context into a pt_regs before invoking any ftrace
* callbacks. So that we can get a sensible backtrace, we create a stack record
* for the callsite and the ftrace entry assembly. This is not sufficient for
* reliable stacktrace: until we create the callsite stack record, its caller
* is missing from the LR and existing chain of frame records.
*/
.macro ftrace_regs_entry, allregs=0
/* Make room for pt_regs, plus a callee frame */
sub sp, sp, #(S_FRAME_SIZE + 16)

/* Save function arguments (and x9 for simplicity) */
stp x0, x1, [sp, #S_X0]
stp x2, x3, [sp, #S_X2]
stp x4, x5, [sp, #S_X4]
stp x6, x7, [sp, #S_X6]
stp x8, x9, [sp, #S_X8]

/* Optionally save the callee-saved registers, always save the FP */
.if \allregs == 1
stp x10, x11, [sp, #S_X10]
stp x12, x13, [sp, #S_X12]
stp x14, x15, [sp, #S_X14]
stp x16, x17, [sp, #S_X16]
stp x18, x19, [sp, #S_X18]
stp x20, x21, [sp, #S_X20]
stp x22, x23, [sp, #S_X22]
stp x24, x25, [sp, #S_X24]
stp x26, x27, [sp, #S_X26]
stp x28, x29, [sp, #S_X28]
.else
str x29, [sp, #S_FP]
.endif

/* Save the callsite's SP and LR */
add x10, sp, #(S_FRAME_SIZE + 16)
stp x9, x10, [sp, #S_LR]

/* Save the PC after the ftrace callsite */
str x30, [sp, #S_PC]

/* Create a frame record for the callsite above pt_regs */
stp x29, x9, [sp, #S_FRAME_SIZE]
add x29, sp, #S_FRAME_SIZE

/* Create our frame record within pt_regs. */
stp x29, x30, [sp, #S_STACKFRAME]
add x29, sp, #S_STACKFRAME
.endm

ENTRY(ftrace_regs_caller)
ftrace_regs_entry 1
b ftrace_common
ENDPROC(ftrace_regs_caller)

ENTRY(ftrace_caller)
ftrace_regs_entry 0
b ftrace_common
ENDPROC(ftrace_caller)

ENTRY(ftrace_common)
sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
mov x1, x9 // parent_ip (callsite's LR)
ldr_l x2, function_trace_op // op
mov x3, sp // regs

GLOBAL(ftrace_call)
bl ftrace_stub

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
nop // If enabled, this will be replaced
// "b ftrace_graph_caller"
#endif

/*
* At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
* x19-x29 per the AAPCS, and we created frame records upon entry, so we need
* to restore x0-x8, x29, and x30.
*/
ftrace_common_return:
/* Restore function arguments */
ldp x0, x1, [sp]
ldp x2, x3, [sp, #S_X2]
ldp x4, x5, [sp, #S_X4]
ldp x6, x7, [sp, #S_X6]
ldr x8, [sp, #S_X8]

/* Restore the callsite's FP, LR, PC */
ldr x29, [sp, #S_FP]
ldr x30, [sp, #S_LR]
ldr x9, [sp, #S_PC]

/* Restore the callsite's SP */
add sp, sp, #S_FRAME_SIZE + 16

ret x9
ENDPROC(ftrace_common)

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
ldr x0, [sp, #S_PC]
sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
add x1, sp, #S_LR // parent_ip (callsite's LR)
ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP)
bl prepare_ftrace_return
b ftrace_common_return
ENDPROC(ftrace_graph_caller)
#else
#endif

#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */

/*
* Gcc with -pg will put the following code in the beginning of each function:
* mov x0, x30
Expand Down Expand Up @@ -160,11 +287,6 @@ GLOBAL(ftrace_graph_call) // ftrace_graph_caller();

mcount_exit
ENDPROC(ftrace_caller)
#endif /* CONFIG_DYNAMIC_FTRACE */

ENTRY(ftrace_stub)
ret
ENDPROC(ftrace_stub)

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
Expand All @@ -184,7 +306,15 @@ ENTRY(ftrace_graph_caller)

mcount_exit
ENDPROC(ftrace_graph_caller)
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */

ENTRY(ftrace_stub)
ret
ENDPROC(ftrace_stub)

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* void return_to_handler(void)
*
Expand Down
84 changes: 70 additions & 14 deletions arch/arm64/kernel/ftrace.c
Expand Up @@ -62,6 +62,19 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ftrace_modify_code(pc, 0, new, false);
}

#ifdef CONFIG_ARM64_MODULE_PLTS
static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
{
struct plt_entry *plt = mod->arch.ftrace_trampolines;

if (addr == FTRACE_ADDR)
return &plt[FTRACE_PLT_IDX];
if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS))
return &plt[FTRACE_REGS_PLT_IDX];
return NULL;
}
#endif

/*
* Turn on the call to ftrace_caller() in instrumented function
*/
Expand All @@ -74,19 +87,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (offset < -SZ_128M || offset >= SZ_128M) {
#ifdef CONFIG_ARM64_MODULE_PLTS
struct module *mod;

/*
* There is only one ftrace trampoline per module. For now,
* this is not a problem since on arm64, all dynamic ftrace
* invocations are routed via ftrace_caller(). This will need
* to be revisited if support for multiple ftrace entry points
* is added in the future, but for now, the pr_err() below
* deals with a theoretical issue only.
*/
if (addr != FTRACE_ADDR) {
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
return -EINVAL;
}
struct plt_entry *plt;

/*
* On kernels that support module PLTs, the offset between the
Expand All @@ -105,7 +106,13 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (WARN_ON(!mod))
return -EINVAL;

addr = (unsigned long)mod->arch.ftrace_trampoline;
plt = get_ftrace_plt(mod, addr);
if (!plt) {
pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
return -EINVAL;
}

addr = (unsigned long)plt;
#else /* CONFIG_ARM64_MODULE_PLTS */
return -EINVAL;
#endif /* CONFIG_ARM64_MODULE_PLTS */
Expand All @@ -117,6 +124,55 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ftrace_modify_code(pc, old, new, true);
}

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
unsigned long pc = rec->ip;
u32 old, new;

old = aarch64_insn_gen_branch_imm(pc, old_addr,
AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);

return ftrace_modify_code(pc, old, new, true);
}

/*
* The compiler has inserted two NOPs before the regular function prologue.
* All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live,
* and x9-x18 are free for our use.
*
* At runtime we want to be able to swing a single NOP <-> BL to enable or
* disable the ftrace call. The BL requires us to save the original LR value,
* so here we insert a <MOV X9, LR> over the first NOP so the instructions
* before the regular prologue are:
*
* | Compiled | Disabled | Enabled |
* +----------+------------+------------+
* | NOP | MOV X9, LR | MOV X9, LR |
* | NOP | NOP | BL <entry> |
*
* The LR value will be recovered by ftrace_regs_entry, and restored into LR
* before returning to the regular function prologue. When a function is not
* being traced, the MOV is not harmful given x9 is not live per the AAPCS.
*
* Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of
* the BL.
*/
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
{
unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
u32 old, new;

old = aarch64_insn_gen_nop();
new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
AARCH64_INSN_REG_LR,
AARCH64_INSN_VARIANT_64BIT);
return ftrace_modify_code(pc, old, new, true);
}
#endif

/*
* Turn off the call to ftrace_caller() in instrumented function
*/
Expand Down
3 changes: 2 additions & 1 deletion arch/arm64/kernel/module-plts.c
Expand Up @@ -4,6 +4,7 @@
*/

#include <linux/elf.h>
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sort.h>
Expand Down Expand Up @@ -330,7 +331,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
tramp->sh_type = SHT_NOBITS;
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
tramp->sh_addralign = __alignof__(struct plt_entry);
tramp->sh_size = sizeof(struct plt_entry);
tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
}

return 0;
Expand Down

0 comments on commit 3b23e49

Please sign in to comment.