Skip to content

Commit

Permalink
x86_64: Fix "bt" command printing stale entries on Linux 6.4 and later
Browse files Browse the repository at this point in the history
Kernel commit fb799447ae29 ("x86,objtool: Split UNWIND_HINT_EMPTY in
two"), which is contained in Linux 6.4 and later kernels, changed
ORC_TYPE_CALL macro from 0 to 2.  As a result, the "bt" command cannot
use ORC entries and displays stale entries in a call trace.

  crash> bt 1
  PID: 1        TASK: ffff93cd06294180  CPU: 51   COMMAND: "systemd"
   #0 [ffffb72bc00cbc98] __schedule at ffffffff86e52aae
   crash-utility#1 [ffffb72bc00cbd00] schedule at ffffffff86e52f6a
   crash-utility#2 [ffffb72bc00cbd18] schedule_hrtimeout_range_clock at ffffffff86e58ef5
   crash-utility#3 [ffffb72bc00cbd88] ep_poll at ffffffff8669624d
   crash-utility#4 [ffffb72bc00cbe28] do_epoll_wait at ffffffff86696371
   crash-utility#5 [ffffb72bc00cbe30] do_timerfd_settime at ffffffff8669902b        <<
   crash-utility#6 [ffffb72bc00cbe60] __x64_sys_epoll_wait at ffffffff86696bf0
   crash-utility#7 [ffffb72bc00cbeb0] do_syscall_64 at ffffffff86e3feb9
   crash-utility#8 [ffffb72bc00cbee0] __task_pid_nr_ns at ffffffff863330d7          <<
   crash-utility#9 [ffffb72bc00cbf08] syscall_exit_to_user_mode at ffffffff86e466b2 << stale entries
  crash-utility#10 [ffffb72bc00cbf18] do_syscall_64 at ffffffff86e3fec9             <<
  crash-utility#11 [ffffb72bc00cbf50] entry_SYSCALL_64_after_hwframe at ffffffff870000aa

Also, struct orc_entry in kernel has changed, and debugging information
for ORC unwinder can be displayed incorrectly.

To fix these,
(1) introduce "kernel_orc_entry_6_4" structure corresponding to 6.4 and
    abstruct structure "orc_entry" in crash,
(2) switch ORC_TYPE_CALL to 2 or 0 with kernel's orc_entry structure.

Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
  • Loading branch information
k-hagio committed May 17, 2023
1 parent 2f28f8e commit 63a0913
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 26 deletions.
24 changes: 22 additions & 2 deletions defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -6402,6 +6402,25 @@ typedef struct __attribute__((__packed__)) {
unsigned int end:1;
} kernel_orc_entry;

typedef struct __attribute__((__packed__)) {
signed short sp_offset;
signed short bp_offset;
unsigned int sp_reg:4;
unsigned int bp_reg:4;
unsigned int type:3;
unsigned int signal:1;
} kernel_orc_entry_6_4;

typedef struct orc_entry {
signed short sp_offset;
signed short bp_offset;
unsigned int sp_reg;
unsigned int bp_reg;
unsigned int type;
unsigned int end;
unsigned int signal;
} orc_entry;

struct ORC_data {
int module_ORC;
uint lookup_num_blocks;
Expand All @@ -6412,10 +6431,10 @@ struct ORC_data {
ulong orc_lookup;
ulong ip_entry;
ulong orc_entry;
kernel_orc_entry kernel_orc_entry;
orc_entry orc_entry_data;
};

#define ORC_TYPE_CALL 0
#define ORC_TYPE_CALL ((machdep->flags & ORC_6_4) ? 2 : 0)
#define ORC_TYPE_REGS 1
#define ORC_TYPE_REGS_IRET 2
#define UNWIND_HINT_TYPE_SAVE 3
Expand Down Expand Up @@ -6492,6 +6511,7 @@ struct machine_specific {
#define ORC (0x4000)
#define KPTI (0x8000)
#define L1TF (0x10000)
#define ORC_6_4 (0x20000)

#define VM_FLAGS (VM_ORIG|VM_2_6_11|VM_XEN|VM_XEN_RHEL4|VM_5LEVEL)

Expand Down
104 changes: 80 additions & 24 deletions x86_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ static void GART_init(void);
static void x86_64_exception_stacks_init(void);
static int in_START_KERNEL_map(ulong);
static ulong orc_ip(ulong);
static kernel_orc_entry *__orc_find(ulong, ulong, uint, ulong);
static kernel_orc_entry *orc_find(ulong);
static kernel_orc_entry *orc_module_find(ulong);
static orc_entry *__orc_find(ulong, ulong, uint, ulong);
static orc_entry *orc_find(ulong);
static orc_entry *orc_module_find(ulong);
static ulong ip_table_to_vaddr(ulong);
static void orc_dump(ulong);

Expand Down Expand Up @@ -806,6 +806,8 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, "%sFRAMESIZE_DEBUG", others++ ? "|" : "");
if (machdep->flags & ORC)
fprintf(fp, "%sORC", others++ ? "|" : "");
if (machdep->flags & ORC_6_4)
fprintf(fp, "%sORC_6_4", others++ ? "|" : "");
if (machdep->flags & FRAMEPOINTER)
fprintf(fp, "%sFRAMEPOINTER", others++ ? "|" : "");
if (machdep->flags & GART_REGION)
Expand Down Expand Up @@ -988,16 +990,20 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, " orc_lookup: %lx\n", ms->orc.orc_lookup);
fprintf(fp, " ip_entry: %lx\n", ms->orc.ip_entry);
fprintf(fp, " orc_entry: %lx\n", ms->orc.orc_entry);
fprintf(fp, " kernel_orc_entry:\n");
fprintf(fp, " sp_offset: %d\n", ms->orc.kernel_orc_entry.sp_offset);
fprintf(fp, " bp_offset: %d\n", ms->orc.kernel_orc_entry.bp_offset);
fprintf(fp, " sp_reg: %d\n", ms->orc.kernel_orc_entry.sp_reg);
fprintf(fp, " bp_reg: %d\n", ms->orc.kernel_orc_entry.bp_reg);
fprintf(fp, " type: %d\n", ms->orc.kernel_orc_entry.type);
fprintf(fp, " orc_entry_data:\n");
fprintf(fp, " sp_offset: %d\n", ms->orc.orc_entry_data.sp_offset);
fprintf(fp, " bp_offset: %d\n", ms->orc.orc_entry_data.bp_offset);
fprintf(fp, " sp_reg: %d\n", ms->orc.orc_entry_data.sp_reg);
fprintf(fp, " bp_reg: %d\n", ms->orc.orc_entry_data.bp_reg);
fprintf(fp, " type: %d\n", ms->orc.orc_entry_data.type);
if (MEMBER_EXISTS("orc_entry", "end"))
fprintf(fp, " end: %d\n", ms->orc.kernel_orc_entry.end);
fprintf(fp, " end: %d\n", ms->orc.orc_entry_data.end);
else
fprintf(fp, " end: (n/a)\n");
if (MEMBER_EXISTS("orc_entry", "signal"))
fprintf(fp, " signal: %d\n", ms->orc.orc_entry_data.signal);
else
fprintf(fp, " signal: (n/a)\n");
}
fprintf(fp, " pto: %s",
machdep->flags & PT_REGS_INIT ? "\n" : "(uninitialized)\n");
Expand Down Expand Up @@ -6391,7 +6397,8 @@ x86_64_ORC_init(void)
!MEMBER_EXISTS("orc_entry", "sp_reg") ||
!MEMBER_EXISTS("orc_entry", "bp_reg") ||
!MEMBER_EXISTS("orc_entry", "type") ||
SIZE(orc_entry) != sizeof(kernel_orc_entry)) {
(SIZE(orc_entry) != sizeof(kernel_orc_entry) &&
SIZE(orc_entry) != sizeof(kernel_orc_entry_6_4))) {
error(WARNING, "ORC unwinder: orc_entry structure has changed\n");
return;
}
Expand Down Expand Up @@ -6439,6 +6446,10 @@ x86_64_ORC_init(void)
MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", "bp");
MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame", "ret_addr");

if (MEMBER_EXISTS("orc_entry", "signal") && /* added at 6.3 */
!MEMBER_EXISTS("orc_entry", "end")) /* removed at 6.4 with type change */
machdep->flags |= ORC_6_4;

machdep->flags |= ORC;
}

Expand Down Expand Up @@ -8521,7 +8532,7 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
int reterror;
int arg_exists;
int exception;
kernel_orc_entry *korc;
orc_entry *korc;

if (!(bt->flags & BT_FRAMESIZE_DEBUG)) {
if ((bt->flags & BT_FRAMESIZE_IGNORE_MASK) ||
Expand Down Expand Up @@ -8613,6 +8624,8 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
korc->sp_reg, korc->bp_reg, korc->type);
if (MEMBER_EXISTS("orc_entry", "end"))
fprintf(fp, " end: %d", korc->end);
if (MEMBER_EXISTS("orc_entry", "signal"))
fprintf(fp, " signal: %d", korc->signal);
fprintf(fp, "\n");
}

Expand Down Expand Up @@ -9117,7 +9130,43 @@ orc_ip(ulong ip)
return (ip + ip_entry);
}

static kernel_orc_entry *
static orc_entry *
orc_get_entry(struct ORC_data *orc)
{
struct orc_entry *entry = &orc->orc_entry_data;

if (machdep->flags & ORC_6_4) {
kernel_orc_entry_6_4 korc;

if (!readmem(orc->orc_entry, KVADDR, &korc, sizeof(kernel_orc_entry_6_4),
"kernel orc_entry", RETURN_ON_ERROR|QUIET))
return NULL;

entry->sp_offset = korc.sp_offset;
entry->bp_offset = korc.bp_offset;
entry->sp_reg = korc.sp_reg;
entry->bp_reg = korc.bp_reg;
entry->type = korc.type;
entry->signal = korc.signal;
} else {
kernel_orc_entry korc;

if (!readmem(orc->orc_entry, KVADDR, &korc, sizeof(kernel_orc_entry),
"kernel orc_entry", RETURN_ON_ERROR|QUIET))
return NULL;

entry->sp_offset = korc.sp_offset;
entry->bp_offset = korc.bp_offset;
entry->sp_reg = korc.sp_reg;
entry->bp_reg = korc.bp_reg;
entry->type = korc.type;
entry->end = korc.end;
}

return entry;
}

static orc_entry *
__orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
{
int index;
Expand All @@ -9127,7 +9176,7 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
int *ip_table = (int *)ip_table_ptr;
struct ORC_data *orc = &machdep->machspec->orc;
ulong vaddr;
kernel_orc_entry *korc;
orc_entry *korc;

if (CRASHDEBUG(2)) {
int i, ip_entry;
Expand Down Expand Up @@ -9171,11 +9220,11 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)

orc->ip_entry = (ulong)found;
orc->orc_entry = u_table_ptr + (index * SIZE(orc_entry));
if (!readmem(orc->orc_entry, KVADDR, &orc->kernel_orc_entry,
sizeof(kernel_orc_entry), "kernel orc_entry", RETURN_ON_ERROR|QUIET))

if (!orc_get_entry(orc))
return NULL;

korc = &orc->kernel_orc_entry;
korc = &orc->orc_entry_data;

if (CRASHDEBUG(2)) {
fprintf(fp, " found: %lx index: %d\n", (ulong)found, index);
Expand All @@ -9184,6 +9233,8 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
orc->orc_entry, korc->sp_offset, korc->bp_offset, korc->sp_reg, korc->bp_reg, korc->type);
if (MEMBER_EXISTS("orc_entry", "end"))
fprintf(fp, " end: %d", korc->end);
if (MEMBER_EXISTS("orc_entry", "signal"))
fprintf(fp, " signal: %d", korc->signal);
fprintf(fp, "\n");
}

Expand All @@ -9195,7 +9246,7 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
#define LOOKUP_START_IP (unsigned long)kt->stext
#define LOOKUP_STOP_IP (unsigned long)kt->etext

static kernel_orc_entry *
static orc_entry *
orc_find(ulong ip)
{
unsigned int idx, start, stop;
Expand Down Expand Up @@ -9265,7 +9316,7 @@ orc_find(ulong ip)
orc->__start_orc_unwind + (start * SIZE(orc_entry)), stop - start, ip);
}

static kernel_orc_entry *
static orc_entry *
orc_module_find(ulong ip)
{
struct load_module *lm;
Expand Down Expand Up @@ -9312,7 +9363,7 @@ static void
orc_dump(ulong ip)
{
struct ORC_data *orc = &machdep->machspec->orc;
kernel_orc_entry *korc;
orc_entry *korc;
ulong vaddr, offset;
struct syment *sp, *orig;

Expand All @@ -9335,18 +9386,23 @@ orc_dump(ulong ip)
fprintf(fp, "%s+%ld -> ", sp->name, offset);
else
fprintf(fp, "(unresolved) -> ");
if (!readmem(orc->orc_entry, KVADDR, &orc->kernel_orc_entry, sizeof(kernel_orc_entry),
"kernel orc_entry", RETURN_ON_ERROR))

if (!orc_get_entry(orc))
error(FATAL, "cannot read orc_entry\n");
korc = &orc->kernel_orc_entry;
korc = &orc->orc_entry_data;
fprintf(fp, "orc: %lx spo: %d bpo: %d spr: %d bpr: %d type: %d",
orc->orc_entry, korc->sp_offset, korc->bp_offset, korc->sp_reg, korc->bp_reg, korc->type);
if (MEMBER_EXISTS("orc_entry", "end"))
fprintf(fp, " end: %d", korc->end);
if (MEMBER_EXISTS("orc_entry", "signal"))
fprintf(fp, " signal: %d", korc->signal);
fprintf(fp, "\n");

orc->ip_entry += sizeof(int);
orc->orc_entry += sizeof(kernel_orc_entry);
if (machdep->flags & ORC_6_4)
orc->orc_entry += sizeof(kernel_orc_entry_6_4);
else
orc->orc_entry += sizeof(kernel_orc_entry);
vaddr = ip_table_to_vaddr(orc->ip_entry);
if ((sp = value_search(vaddr, &offset)))
if (sp == orig)
Expand Down

0 comments on commit 63a0913

Please sign in to comment.