Skip to content

Commit

Permalink
x86_64: Fix "bt" command on kernels with random_kstack_offset=on
Browse files Browse the repository at this point in the history
On kernels configured with CONFIG_RANDOMIZE_KSTACK_OFFSET=y and
random_kstack_offset=on, a random offset is added to the stack with
__kstack_alloca() at the beginning of do_syscall_64() and other syscall
entry functions.  This eventually does the following instruction.

  <do_syscall_64+32>:  sub    %rax,%rsp

On the other hand, crash uses only a part of data for ORC unwinder to
unwind stacks and if an ip value doesn't have a usable ORC data, it
caluculates the frame size with parsing the assembly of the function.

However, crash cannot calculate the frame size correctly with the
instruction above, and prints stale return addresses like this:

  crash> bt 1
  PID: 1        TASK: ffff9c250023b880  CPU: 0    COMMAND: "systemd"
    #0 [ffffb7e5c001fc80] __schedule at ffffffff91ae2b16
    crash-utility#1 [ffffb7e5c001fd00] schedule at ffffffff91ae2ed3
    crash-utility#2 [ffffb7e5c001fd18] schedule_hrtimeout_range_clock at ffffffff91ae7ed8
    crash-utility#3 [ffffb7e5c001fda8] ep_poll at ffffffff913ef828
    crash-utility#4 [ffffb7e5c001fe48] do_epoll_wait at ffffffff913ef943
    crash-utility#5 [ffffb7e5c001fe80] __x64_sys_epoll_wait at ffffffff913f0130
    crash-utility#6 [ffffb7e5c001fed0] do_syscall_64 at ffffffff91ad7169
    crash-utility#7 [ffffb7e5c001fef0] do_syscall_64 at ffffffff91ad7179             <<
    crash-utility#8 [ffffb7e5c001ff10] syscall_exit_to_user_mode at ffffffff91adaab2 << stale entries
    crash-utility#9 [ffffb7e5c001ff20] do_syscall_64 at ffffffff91ad7179             <<
   crash-utility#10 [ffffb7e5c001ff50] entry_SYSCALL_64_after_hwframe at ffffffff91c0009b
       RIP: 00007f258d9427ae  RSP: 00007fffda631d60  RFLAGS: 00000293
       ...

To fix this, enhance the usage of ORC data.  The ORC unwinder often uses
%rbp value, so keep it from exception frames and inactive task stacks.

Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
  • Loading branch information
k-hagio committed Feb 20, 2023
1 parent e0e6e4a commit 21ad048
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 34 deletions.
1 change: 1 addition & 0 deletions defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2207,6 +2207,7 @@ struct offset_table { /* stash of commonly-used offsets */
long sock_sk_common;
long sock_common_skc_v6_daddr;
long sock_common_skc_v6_rcv_saddr;
long inactive_task_frame_bp;
};

struct size_table { /* stash of commonly-used sizes */
Expand Down
1 change: 1 addition & 0 deletions symbols.c
Original file line number Diff line number Diff line change
Expand Up @@ -8822,6 +8822,7 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(task_struct_tss_ksp));
fprintf(fp, " task_struct_thread_eip: %ld\n",
OFFSET(task_struct_thread_eip));
fprintf(fp, " inactive_task_frame_bp: %ld\n", OFFSET(inactive_task_frame_bp));
fprintf(fp, " inactive_task_frame_ret_addr: %ld\n",
OFFSET(inactive_task_frame_ret_addr));
fprintf(fp, " task_struct_thread_esp: %ld\n",
Expand Down
115 changes: 81 additions & 34 deletions x86_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ static int x86_64_do_not_cache_framesize(struct syment *, ulong);
static int x86_64_framesize_cache_func(int, ulong, int *, int, struct syment *);
static ulong x86_64_get_framepointer(struct bt_info *, ulong);
int search_for_eframe_target_caller(struct bt_info *, ulong, int *);
static int x86_64_get_framesize(struct bt_info *, ulong, ulong);
static int x86_64_get_framesize(struct bt_info *, ulong, ulong, char *);
static void x86_64_framesize_debug(struct bt_info *);
static void x86_64_get_active_set(void);
static int x86_64_get_kvaddr_ranges(struct vaddr_range *);
Expand Down Expand Up @@ -3642,7 +3642,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
bt, ofp);
rsp += SIZE(pt_regs); /* guaranteed kernel mode */
if (bt->eframe_ip && ((framesize = x86_64_get_framesize(bt,
bt->eframe_ip, rsp)) >= 0))
bt->eframe_ip, rsp, NULL)) >= 0))
rsp += framesize;
level++;
irq_eframe = 0;
Expand Down Expand Up @@ -3674,7 +3674,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
case BACKTRACE_ENTRY_DISPLAYED:
level++;
if ((framesize = x86_64_get_framesize(bt,
bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
bt->eframe_ip ? bt->eframe_ip : *up, rsp, NULL)) >= 0) {
rsp += framesize;
i += framesize/sizeof(ulong);
}
Expand Down Expand Up @@ -3747,7 +3747,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
}

level++;
if ((framesize = x86_64_get_framesize(bt, bt->instptr, rsp)) >= 0)
if ((framesize = x86_64_get_framesize(bt, bt->instptr, rsp, NULL)) >= 0)
rsp += framesize;
}
}
Expand Down Expand Up @@ -3799,7 +3799,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
case BACKTRACE_ENTRY_DISPLAYED:
level++;
if ((framesize = x86_64_get_framesize(bt,
bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
bt->eframe_ip ? bt->eframe_ip : *up, rsp, NULL)) >= 0) {
rsp += framesize;
i += framesize/sizeof(ulong);
}
Expand Down Expand Up @@ -3909,24 +3909,34 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
(STREQ(rip_symbol, "thread_return") ||
STREQ(rip_symbol, "schedule") ||
STREQ(rip_symbol, "__schedule"))) {
if (STREQ(rip_symbol, "__schedule")) {
i = (rsp - bt->stackbase)/sizeof(ulong);
x86_64_print_stack_entry(bt, ofp, level,
i, bt->instptr);
level++;
rsp = __schedule_frame_adjust(rsp, bt);
if (STREQ(closest_symbol(bt->instptr), "schedule"))
if ((machdep->flags & ORC) && VALID_MEMBER(inactive_task_frame_ret_addr)) {
/*
* %rsp should have the address of inactive_task_frame, so
* skip the registers before ret_addr to adjust rsp.
*/
if (CRASHDEBUG(1))
fprintf(fp, "rsp: %lx rbp: %lx\n", rsp, bt->bptr);
rsp += OFFSET(inactive_task_frame_ret_addr);
} else {
if (STREQ(rip_symbol, "__schedule")) {
i = (rsp - bt->stackbase)/sizeof(ulong);
x86_64_print_stack_entry(bt, ofp, level,
i, bt->instptr);
level++;
rsp = __schedule_frame_adjust(rsp, bt);
if (STREQ(closest_symbol(bt->instptr), "schedule"))
bt->flags |= BT_SCHEDULE;
} else
bt->flags |= BT_SCHEDULE;
} else
bt->flags |= BT_SCHEDULE;

if (bt->flags & BT_SCHEDULE) {
i = (rsp - bt->stackbase)/sizeof(ulong);
x86_64_print_stack_entry(bt, ofp, level,
i, bt->instptr);
bt->flags &= ~(ulonglong)BT_SCHEDULE;
rsp += sizeof(ulong);
level++;

if (bt->flags & BT_SCHEDULE) {
i = (rsp - bt->stackbase)/sizeof(ulong);
x86_64_print_stack_entry(bt, ofp, level,
i, bt->instptr);
bt->flags &= ~(ulonglong)BT_SCHEDULE;
rsp += sizeof(ulong);
level++;
}
}
}

Expand Down Expand Up @@ -3957,7 +3967,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
irq_eframe = 0;
bt->flags |= BT_EFRAME_TARGET;
if (bt->eframe_ip && ((framesize = x86_64_get_framesize(bt,
bt->eframe_ip, rsp)) >= 0))
bt->eframe_ip, rsp, NULL)) >= 0))
rsp += framesize;
bt->flags &= ~BT_EFRAME_TARGET;
}
Expand Down Expand Up @@ -4044,7 +4054,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
case BACKTRACE_ENTRY_DISPLAYED:
level++;
if ((framesize = x86_64_get_framesize(bt,
bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
bt->eframe_ip ? bt->eframe_ip : *up, rsp, (char *)up)) >= 0) {
rsp += framesize;
i += framesize/sizeof(ulong);
}
Expand Down Expand Up @@ -4755,7 +4765,8 @@ x86_64_exception_frame(ulong flags, ulong kvaddr, char *local,
bt->instptr = rip;
bt->stkptr = rsp;
bt->bptr = rbp;
}
} else if (machdep->flags & ORC)
bt->bptr = rbp;

if (kvaddr)
FREEBUF(pt_regs_buf);
Expand Down Expand Up @@ -5315,6 +5326,10 @@ x86_64_get_sp(struct bt_info *bt)
OFFSET(thread_struct_rsp), KVADDR,
&rsp, sizeof(void *),
"thread_struct rsp", FAULT_ON_ERROR);
if ((machdep->flags & ORC) && VALID_MEMBER(inactive_task_frame_bp)) {
readmem(rsp + OFFSET(inactive_task_frame_bp), KVADDR, &bt->bptr,
sizeof(void *), "inactive_task_frame.bp", FAULT_ON_ERROR);
}
return rsp;
}

Expand Down Expand Up @@ -6421,6 +6436,9 @@ x86_64_ORC_init(void)
orc->__stop_orc_unwind = symbol_value("__stop_orc_unwind");
orc->orc_lookup = symbol_value("orc_lookup");

MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", "bp");
MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame", "ret_addr");

machdep->flags |= ORC;
}

Expand Down Expand Up @@ -8489,7 +8507,7 @@ search_for_eframe_target_caller(struct bt_info *bt, ulong stkptr, int *framesize
(BT_OLD_BACK_TRACE|BT_TEXT_SYMBOLS|BT_TEXT_SYMBOLS_ALL|BT_FRAMESIZE_DISABLE)

static int
x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp)
x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_ptr)
{
int c, framesize, instr, arg, max;
struct syment *sp;
Expand Down Expand Up @@ -8590,19 +8608,48 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp)
if ((machdep->flags & ORC) && (korc = orc_find(textaddr))) {
if (CRASHDEBUG(1)) {
fprintf(fp,
"rsp: %lx textaddr: %lx framesize: %d -> spo: %d bpo: %d spr: %d bpr: %d type: %d %s",
"rsp: %lx textaddr: %lx framesize: %d -> spo: %d bpo: %d spr: %d bpr: %d type: %d",
rsp, textaddr, framesize, korc->sp_offset, korc->bp_offset,
korc->sp_reg, korc->bp_reg, korc->type,
(korc->type == ORC_TYPE_CALL) && (korc->sp_reg == ORC_REG_SP) ? "" : "(UNUSED)");
korc->sp_reg, korc->bp_reg, korc->type);
if (MEMBER_EXISTS("orc_entry", "end"))
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
}

if ((korc->type == ORC_TYPE_CALL) && (korc->sp_reg == ORC_REG_SP)) {
framesize = (korc->sp_offset - 8);
return (x86_64_framesize_cache_func(FRAMESIZE_ENTER, textaddr,
&framesize, exception, NULL));
if (korc->type == ORC_TYPE_CALL) {
ulong prev_sp = 0, prev_bp = 0;
framesize = -1;

if (korc->sp_reg == ORC_REG_SP) {
framesize = (korc->sp_offset - 8);

/* rsp points to a return address, so +8 to use sp_offset */
prev_sp = (rsp + 8) + korc->sp_offset;
if (CRASHDEBUG(1))
fprintf(fp, "rsp: %lx prev_sp: %lx\n", rsp, prev_sp);
} else if ((korc->sp_reg == ORC_REG_BP) && bt->bptr) {
prev_sp = bt->bptr + korc->sp_offset;
framesize = (prev_sp - (rsp + 8) - 8);
if (CRASHDEBUG(1))
fprintf(fp, "rsp: %lx rbp: %lx prev_sp: %lx framesize: %d\n",
rsp, bt->bptr, prev_sp, framesize);
}

if ((korc->bp_reg == ORC_REG_PREV_SP) && prev_sp) {
prev_bp = prev_sp + korc->bp_offset;
if (stack_ptr && INSTACK(prev_bp, bt)) {
bt->bptr = ULONG(stack_ptr + (prev_bp - rsp));
if (CRASHDEBUG(1))
fprintf(fp, "rsp: %lx prev_sp: %lx prev_bp: %lx -> %lx\n",
rsp, prev_sp, prev_bp, bt->bptr);
} else
bt->bptr = 0;
} else if ((korc->bp_reg != ORC_REG_UNDEFINED))
bt->bptr = 0;

if (framesize >= 0)
/* Do not cache this, possibly it may be variable. */
return framesize;
}
}

Expand Down Expand Up @@ -8758,7 +8805,7 @@ x86_64_framesize_debug(struct bt_info *bt)
if (!bt->hp->eip)
error(INFO, "x86_64_framesize_debug: ignoring command\n");
else
x86_64_get_framesize(bt, bt->hp->eip, 0);
x86_64_get_framesize(bt, bt->hp->eip, 0, NULL);
break;

case -3:
Expand Down

0 comments on commit 21ad048

Please sign in to comment.