Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[llvm-objdump] Handle multiple syms at same addr in disassembly.
The main disassembly loop in llvm-objdump works by iterating through the symbols in a code section, and for each one, dumping the range of the section from that symbol to the next. If there's another symbol defined at the same location, then that range will have length 0, and llvm-objdump will skip over the symbol entirely. As a result, llvm-objdump will only show the last of the symbols defined at that address. Not only that, but the other symbols won't even be checked against the `--disassemble-symbol` list. So if you have two symbols `foo` and `bar` defined in the same place, then one of `--disassemble-symbol=foo` and `--disassemble-symbol=bar` will generate an error message and no disassembly. I think a better approach in that situation is to prioritise display of the symbol the user actually asked for. Also, if the user specifically asks for disassembly of //both// of two symbols defined at the same address, the best response I can think of is to disassemble the code once, preceded by both symbol names. This involves teaching llvm-objdump to be able to display more than one symbol name at the head of a disassembled section, which also makes it possible to implement a `--show-all-symbols` option to display //every// symbol defined in the code, not just the most preferred one at each address. This change also turns out to fix a bug in which `--disassemble-all` on a mixed Arm/Thumb ELF file would fail to switch disassembly states between Arm and Thumb functions, because the mapping symbols were accidentally ignored. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D131589
- Loading branch information
1 parent
5259528
commit 8e29f3f
Showing
6 changed files
with
409 additions
and
57 deletions.
There are no files selected for viewing
32 changes: 32 additions & 0 deletions
32
llvm/test/tools/llvm-objdump/ELF/ARM/disassemble-all-mapping-symbols.s
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Regression test for a bug in which --disassemble-all had the side effect | ||
// of stopping mapping symbols from being checked in code sections, so that | ||
// mixed Arm/Thumb code would not all be correctly disassembled. | ||
|
||
@ RUN: llvm-mc -triple arm-unknown-linux -filetype=obj %s -o %t.o | ||
@ RUN: llvm-objdump -d %t.o | FileCheck %s | ||
@ RUN: llvm-objdump -d --disassemble-all %t.o | FileCheck %s | ||
|
||
@ CHECK: 00000000 <armfunc>: | ||
@ CHECK: 0: e2800001 add r0, r0, #1 | ||
@ CHECK: 4: e12fff1e bx lr | ||
@ | ||
@ CHECK: 00000008 <thmfunc>: | ||
@ CHECK: 8: f100 0001 add.w r0, r0, #1 | ||
@ CHECK: c: 4770 bx lr | ||
|
||
.arch armv8a | ||
.text | ||
|
||
.arm | ||
.global armfunc | ||
.type armfunc, %function | ||
armfunc: | ||
add r0, r0, #1 | ||
bx lr | ||
|
||
.thumb | ||
.global thmfunc | ||
.type thmfunc, %function | ||
thmfunc: | ||
add r0, r0, #1 | ||
bx lr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
@ REQUIRES: arm-registered-target | ||
|
||
// Test that code symbols take priority over data symbols if both are | ||
// defined at the same address during disassembly. | ||
// | ||
// In the past, llvm-objdump would select the alphabetically last | ||
// symbol at each address. To demonstrate that it's now choosing by | ||
// symbol type, we define pairs of code and data symbols at the same | ||
// address in such a way that the code symbol and data symbol each | ||
// have a chance to appear alphabetically last. Also, we test that | ||
// both STT_FUNC and STT_NOTYPE are regarded as code symbols. | ||
|
||
@ RUN: llvm-mc -triple armv8a-unknown-linux -filetype=obj %s -o %t.o | ||
@ RUN: llvm-objdump --triple armv8a -d %t.o | FileCheck %s | ||
|
||
// Ensure that all four instructions in the section are disassembled | ||
// rather than dumped as data, and that in each case, the code symbol | ||
// is displayed before the disassembly, and not the data symbol at the | ||
// same address. | ||
|
||
@ CHECK: Disassembly of section .text: | ||
@ CHECK-EMPTY: | ||
@ CHECK-NEXT: <A1function>: | ||
@ CHECK-NEXT: movw r0, #1 | ||
@ CHECK-EMPTY: | ||
@ CHECK-NEXT: <B2function>: | ||
@ CHECK-NEXT: movw r0, #2 | ||
@ CHECK-EMPTY: | ||
@ CHECK-NEXT: <A3notype>: | ||
@ CHECK-NEXT: movw r0, #3 | ||
@ CHECK-EMPTY: | ||
@ CHECK-NEXT: <B4notype>: | ||
@ CHECK-NEXT: movw r0, #4 | ||
|
||
.text | ||
|
||
.globl A1function | ||
.globl B2function | ||
.globl A3notype | ||
.globl B4notype | ||
.globl B1object | ||
.globl A2object | ||
.globl B3object | ||
.globl A4object | ||
|
||
.type A1function,%function | ||
.type B2function,%function | ||
.type A3notype,%notype | ||
.type B4notype,%notype | ||
.type B1object,%object | ||
.type A2object,%object | ||
.type B3object,%object | ||
.type A4object,%object | ||
|
||
A1function: | ||
B1object: | ||
movw r0, #1 | ||
A2object: | ||
B2function: | ||
movw r0, #2 | ||
A3notype: | ||
B3object: | ||
movw r0, #3 | ||
A4object: | ||
B4notype: | ||
movw r0, #4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
// This test demonstrates that the alphabetical-order tie breaking between | ||
// multiple symbols defined at the same address is based on the raw symbol | ||
// name, not its demangled version. | ||
|
||
@ REQUIRES: arm-registered-target | ||
|
||
@ RUN: llvm-mc -triple armv8a-unknown-linux -filetype=obj %s -o %t.o | ||
|
||
// All the run lines below should generate some subset of this | ||
// display, with different parts included: | ||
|
||
@ COMMON: Disassembly of section .text: | ||
@ | ||
@ RAW-B: 00000000 <_Z4bbbbv>: | ||
@ NICE-B: 00000000 <bbbb()>: | ||
@ NO-B-NOT: bbbb | ||
@ A: 00000000 <aaaa>: | ||
@ COMMON: 0: e0800080 add r0, r0, r0, lsl #1 | ||
@ COMMON: 4: e12fff1e bx lr | ||
|
||
// The default disassembly chooses just the alphabetically later symbol, which | ||
// is aaaa, because the leading _ on a mangled name sorts before lowercase | ||
// ASCII. | ||
|
||
@ RUN: llvm-objdump --triple armv8a -d %t.o | FileCheck --check-prefixes=COMMON,NO-B,A %s | ||
|
||
// With the --show-all-symbols option, bbbb is also shown, in its raw form. | ||
|
||
@ RUN: llvm-objdump --triple armv8a --show-all-symbols -d %t.o | FileCheck --check-prefixes=COMMON,RAW-B,A %s | ||
|
||
// With --demangle as well, bbbb is demangled, but that doesn't change its | ||
// place in the sorting order. | ||
|
||
@ RUN: llvm-objdump --triple armv8a --show-all-symbols --demangle -d %t.o | FileCheck --check-prefixes=COMMON,NICE-B,A %s | ||
|
||
.text | ||
.globl aaaa | ||
.globl _Z4bbbv | ||
aaaa: | ||
_Z4bbbbv: | ||
add r0, r0, r0, lsl #1 | ||
bx lr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
// This test checks the behavior of llvm-objdump's --disassemble-symbols and | ||
// --show-all-symbols options, in the presence of multiple symbols defined at | ||
// the same address in an object file. | ||
|
||
// The test input file contains an Arm and a Thumb function, each with two | ||
// function-type symbols defined at its entry point. Also, because it's Arm, | ||
// there's a $a mapping symbol defined at the start of the section, and a $t | ||
// mapping symbol at the point where Arm code stops and Thumb code begins. | ||
|
||
// By default, llvm-objdump will pick one of the symbols to disassemble at each | ||
// point where any are defined at all. The tie-break sorting criterion is | ||
// alphabetic, so it will be the alphabetically later symbol in each case: of | ||
// the names aaaa and bbbb for the Arm function it picks bbbb, and of cccc and | ||
// dddd for the Thumb function it picks dddd. | ||
|
||
// Including an Arm and a Thumb function also re-checks that these changes to | ||
// the display of symbols doesn't affect the recognition of mapping symbols for | ||
// the purpose of switching disassembly mode. | ||
|
||
@ REQUIRES: arm-registered-target | ||
|
||
@ RUN: llvm-mc -triple armv8a-unknown-linux -filetype=obj %s -o %t.o | ||
|
||
// All the run lines below should generate some subset of this | ||
// display, with different parts included: | ||
|
||
@ HEAD: Disassembly of section .text: | ||
@ HEAD-EMPTY: | ||
@ AMAP-NEXT: 00000000 <$a.0>: | ||
@ AAAA-NEXT: 00000000 <aaaa>: | ||
@ BBBB-NEXT: 00000000 <bbbb>: | ||
@ AABB-NEXT: 0: e0800080 add r0, r0, r0, lsl #1 | ||
@ AABB-NEXT: 4: e12fff1e bx lr | ||
@ BOTH-EMPTY: | ||
@ TMAP-NEXT: 00000008 <$t.1>: | ||
@ CCCC-NEXT: 00000008 <cccc>: | ||
@ DDDD-NEXT: 00000008 <dddd>: | ||
@ CCDD-NEXT: 8: eb00 0080 add.w r0, r0, r0, lsl #2 | ||
@ CCDD-NEXT: c: 4770 bx lr | ||
|
||
// The default disassembly chooses just the alphabetically later symbol of each | ||
// set, namely bbbb and dddd. | ||
|
||
@ RUN: llvm-objdump --triple armv8a -d %t.o | FileCheck --check-prefixes=HEAD,BBBB,AABB,BOTH,DDDD,CCDD %s | ||
|
||
// With the --show-all-symbols option, all the symbols are shown, including the | ||
// administrative mapping symbols. | ||
|
||
@ RUN: llvm-objdump --triple armv8a --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,AMAP,AAAA,BBBB,AABB,BOTH,TMAP,CCCC,DDDD,CCDD %s | ||
|
||
// If we use --disassemble-symbols to ask for the disassembly of aaaa or bbbb | ||
// or both, then we expect the second cccc/dddd function not to appear in the | ||
// output at all. Also, we want to see whichever symbol we asked about, or both | ||
// if we asked about both. | ||
|
||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa -d %t.o | FileCheck --check-prefixes=HEAD,AAAA,AABB %s | ||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=bbbb -d %t.o | FileCheck --check-prefixes=HEAD,BBBB,AABB %s | ||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa,bbbb -d %t.o | FileCheck --check-prefixes=HEAD,AAAA,BBBB,AABB %s | ||
|
||
// With _any_ of those three options and also --show-all-symbols, the | ||
// disassembled code is still limited to just the symbol(s) you asked about, | ||
// but all symbols defined at the same address are mentioned, whether you asked | ||
// about them or not. | ||
|
||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,AMAP,AAAA,BBBB,AABB %s | ||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=bbbb --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,AMAP,AAAA,BBBB,AABB %s | ||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=aaaa,bbbb --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,AMAP,AAAA,BBBB,AABB %s | ||
|
||
// Similarly for the Thumb function and its symbols. This time we must check | ||
// that the aaaa/bbbb block of code was not disassembled _before_ the output | ||
// we're expecting. | ||
|
||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc -d %t.o | FileCheck --check-prefixes=HEAD,CCCC,CCDD %s | ||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=dddd -d %t.o | FileCheck --check-prefixes=HEAD,DDDD,CCDD %s | ||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc,dddd -d %t.o | FileCheck --check-prefixes=HEAD,CCCC,DDDD,CCDD %s | ||
|
||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,TMAP,CCCC,DDDD,CCDD %s | ||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=dddd --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,TMAP,CCCC,DDDD,CCDD %s | ||
@ RUN: llvm-objdump --triple armv8a --disassemble-symbols=cccc,dddd --show-all-symbols -d %t.o | FileCheck --check-prefixes=HEAD,TMAP,CCCC,DDDD,CCDD %s | ||
|
||
.text | ||
.globl aaaa | ||
.globl bbbb | ||
.globl cccc | ||
.globl dddd | ||
|
||
.arm | ||
aaaa: | ||
bbbb: | ||
add r0, r0, r0, lsl #1 | ||
bx lr | ||
|
||
.thumb | ||
cccc: | ||
dddd: | ||
add.w r0, r0, r0, lsl #2 | ||
bx lr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.