Skip to content

Commit

Permalink
[llvm-objdump] Switch between ARM/Thumb based on mapping symbols.
Browse files Browse the repository at this point in the history
The ARMDisassembler changes allow changing between ARM and Thumb mode
based on the MCSubtargetInfo, rather than the Target, which simplifies
the other changes a bit.

I'm not really happy with adding more target-specific logic to
tools/llvm-objdump/, but there isn't any easy way around it: the logic
in question specifically applies to disassembling an object file, and
that code simply isn't located in lib/Target, at least at the moment.

Differential Revision: https://reviews.llvm.org/D60927

llvm-svn: 363903
  • Loading branch information
efriedma-quic committed Jun 20, 2019
1 parent e4c2e9b commit d88e28d
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 77 deletions.
3 changes: 2 additions & 1 deletion llvm/include/llvm/Object/ELFObjectFile.h
Expand Up @@ -54,7 +54,6 @@ class ELFObjectFileBase : public ObjectFile {
protected:
ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);

virtual uint16_t getEMachine() const = 0;
virtual uint64_t getSymbolSize(DataRefImpl Symb) const = 0;
virtual uint8_t getSymbolBinding(DataRefImpl Symb) const = 0;
virtual uint8_t getSymbolOther(DataRefImpl Symb) const = 0;
Expand Down Expand Up @@ -91,6 +90,8 @@ class ELFObjectFileBase : public ObjectFile {

virtual uint16_t getEType() const = 0;

virtual uint16_t getEMachine() const = 0;

std::vector<std::pair<DataRefImpl, uint64_t>> getPltAddresses() const;
};

Expand Down
57 changes: 28 additions & 29 deletions llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
Expand Up @@ -139,23 +139,18 @@ class ARMDisassembler : public MCDisassembler {
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
};

/// Thumb disassembler for all Thumb platforms.
class ThumbDisassembler : public MCDisassembler {
public:
ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
MCDisassembler(STI, Ctx) {
}

~ThumbDisassembler() override = default;
private:
DecodeStatus getARMInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const;

DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
DecodeStatus getThumbInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const;

private:
mutable ITStatus ITBlock;
mutable VPTStatus VPTBlock;

Expand Down Expand Up @@ -519,12 +514,6 @@ static MCDisassembler *createARMDisassembler(const Target &T,
return new ARMDisassembler(STI, Ctx);
}

static MCDisassembler *createThumbDisassembler(const Target &T,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ThumbDisassembler(STI, Ctx);
}

// Post-decoding checks
static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
uint64_t Address, raw_ostream &OS,
Expand Down Expand Up @@ -562,6 +551,16 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address, raw_ostream &OS,
raw_ostream &CS) const {
if (STI.getFeatureBits()[ARM::ModeThumb])
return getThumbInstruction(MI, Size, Bytes, Address, OS, CS);
return getARMInstruction(MI, Size, Bytes, Address, OS, CS);
}

DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &OS,
raw_ostream &CS) const {
CommentStream = &CS;

assert(!STI.getFeatureBits()[ARM::ModeThumb] &&
Expand Down Expand Up @@ -698,7 +697,7 @@ static bool isVectorPredicable(unsigned Opcode) {
// to fix up the predicate operands using this context information as a
// post-pass.
MCDisassembler::DecodeStatus
ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
MCDisassembler::DecodeStatus S = Success;

const FeatureBitset &FeatureBits = getSubtargetInfo().getFeatureBits();
Expand Down Expand Up @@ -813,7 +812,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
// mode, the auto-generated decoder will give them an (incorrect)
// predicate operand. We need to rewrite these operands based on the IT
// context as a post-pass.
void ThumbDisassembler::UpdateThumbVFPPredicate(
void ARMDisassembler::UpdateThumbVFPPredicate(
DecodeStatus &S, MCInst &MI) const {
unsigned CC;
CC = ITBlock.getITCC();
Expand Down Expand Up @@ -844,11 +843,11 @@ void ThumbDisassembler::UpdateThumbVFPPredicate(
}
}

DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &OS,
raw_ostream &CS) const {
DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &OS,
raw_ostream &CS) const {
CommentStream = &CS;

assert(STI.getFeatureBits()[ARM::ModeThumb] &&
Expand Down Expand Up @@ -1046,9 +1045,9 @@ extern "C" void LLVMInitializeARMDisassembler() {
TargetRegistry::RegisterMCDisassembler(getTheARMBETarget(),
createARMDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheThumbLETarget(),
createThumbDisassembler);
createARMDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheThumbBETarget(),
createThumbDisassembler);
createARMDisassembler);
}

static const uint16_t GPRDecoderTable[] = {
Expand Down
19 changes: 5 additions & 14 deletions llvm/test/CodeGen/ARM/inlineasm-switch-mode.ll
@@ -1,7 +1,4 @@
;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj > %t
; Two pass decoding needed because llvm-objdump does not respect mapping symbols
;RUN: llvm-objdump -triple=armv7 -d %t | FileCheck %s --check-prefix=ARM
;RUN: llvm-objdump -triple=thumbv7 -d %t | FileCheck %s --check-prefix=THUMB
;RUN: llc -mtriple=thumbv7-linux-gnueabi < %s | llvm-mc -triple=thumbv7-linux-gnueabi -filetype=obj | llvm-objdump -d - | FileCheck %s

define hidden i32 @bah(i8* %start) #0 align 2 {
%1 = ptrtoint i8* %start to i32
Expand All @@ -10,13 +7,7 @@ define hidden i32 @bah(i8* %start) #0 align 2 {
ret i32 %3
}

; ARM: $a
; ARM-NEXT: 04 70 2d e5 str r7, [sp, #-4]!
; ARM: $t
; ARM-NEXT: 48 1c

; THUMB: $a{{.*}}:
; THUMB-NEXT: 04 70
; THUMB-NEXT: 2d e5
; THUMB: $t{{.*}}:
; THUMB-NEXT: 48 1c adds r0, r1, #1
; CHECK: $a{{.*}}:
; CHECK-NEXT: 04 70 2d e5 str r7, [sp, #-4]!
; CHECK: $t{{.*}}:
; CHECK-NEXT: 48 1c adds r0, r1, #1
8 changes: 4 additions & 4 deletions llvm/test/tools/llvm-objdump/ARM/v7r-subfeatures.s
@@ -1,5 +1,6 @@
@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -triple=thumb -d - | FileCheck %s
@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -triple=arm -d - | FileCheck %s --check-prefix=CHECK-ARM
@ RUN: llvm-mc < %s -triple armv7r -mattr=+hwdiv-arm -filetype=obj | llvm-objdump -d - | FileCheck %s
@ v7r implies Thumb hwdiv, but ARM hwdiv is optional
@ FIXME: Does that imply we should actually refuse to disassemble it?

.eabi_attribute Tag_CPU_arch, 10 // v7
.eabi_attribute Tag_CPU_arch_profile, 0x52 // 'R' profile
Expand All @@ -9,8 +10,7 @@ div_arm:
udiv r0, r1, r2

@CHECK-LABEL: div_arm
@CHECK-NOT: udiv r0, r1, r2
@CHECK-ARM-NOT: udiv r0, r1, r2
@CHECK: 11 f2 30 e7 <unknown>

.thumb
div_thumb:
Expand Down
109 changes: 80 additions & 29 deletions llvm/tools/llvm-objdump/llvm-objdump.cpp
Expand Up @@ -603,13 +603,18 @@ void SourcePrinter::printSourceLine(raw_ostream &OS,
OldLineInfo = LineInfo;
}

static bool isAArch64Elf(const ObjectFile *Obj) {
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
return Elf && Elf->getEMachine() == ELF::EM_AARCH64;
}

static bool isArmElf(const ObjectFile *Obj) {
return (Obj->isELF() &&
(Obj->getArch() == Triple::aarch64 ||
Obj->getArch() == Triple::aarch64_be ||
Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb ||
Obj->getArch() == Triple::thumb ||
Obj->getArch() == Triple::thumbeb));
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
return Elf && Elf->getEMachine() == ELF::EM_ARM;
}

static bool hasMappingSymbols(const ObjectFile *Obj) {
return isArmElf(Obj) || isAArch64Elf(Obj);
}

static void printRelocation(const RelocationRef &Rel, uint64_t Address,
Expand Down Expand Up @@ -954,10 +959,24 @@ static bool shouldAdjustVA(const SectionRef &Section) {
return false;
}


typedef std::pair<uint64_t, char> MappingSymbolPair;
static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols,
uint64_t Address) {
auto Sym = bsearch(MappingSymbols, [Address](const MappingSymbolPair &Val) {
return Val.first > Address;
});
// Return zero for any address before the first mapping symbol; this means
// we should use the default disassembly mode, depending on the target.
if (Sym == MappingSymbols.begin())
return '\x00';
return (Sym - 1)->second;
}

static uint64_t
dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
const ObjectFile *Obj, ArrayRef<uint8_t> Bytes,
const std::vector<uint64_t> &TextMappingSymsAddr) {
ArrayRef<MappingSymbolPair> MappingSymbols) {
support::endianness Endian =
Obj->isLittleEndian() ? support::little : support::big;
while (Index < End) {
Expand All @@ -981,8 +1000,7 @@ dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
++Index;
}
outs() << "\n";
if (std::binary_search(TextMappingSymsAddr.begin(),
TextMappingSymsAddr.end(), Index))
if (getMappingSymbolKind(MappingSymbols, Index) != 'd')
break;
}
return Index;
Expand Down Expand Up @@ -1023,10 +1041,19 @@ static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
}

static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
MCContext &Ctx, MCDisassembler *DisAsm,
MCContext &Ctx, MCDisassembler *PrimaryDisAsm,
MCDisassembler *SecondaryDisAsm,
const MCInstrAnalysis *MIA, MCInstPrinter *IP,
const MCSubtargetInfo *STI, PrettyPrinter &PIP,
const MCSubtargetInfo *PrimarySTI,
const MCSubtargetInfo *SecondarySTI,
PrettyPrinter &PIP,
SourcePrinter &SP, bool InlineRelocs) {
const MCSubtargetInfo *STI = PrimarySTI;
MCDisassembler *DisAsm = PrimaryDisAsm;
bool PrimaryIsThumb = false;
if (isArmElf(Obj))
PrimaryIsThumb = STI->checkFeatures("+thumb-mode");

std::map<SectionRef, std::vector<RelocationRef>> RelocMap;
if (InlineRelocs)
RelocMap = getRelocsMap(*Obj);
Expand Down Expand Up @@ -1113,25 +1140,23 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,

// Get the list of all the symbols in this section.
SectionSymbolsTy &Symbols = AllSymbols[Section];
std::vector<uint64_t> DataMappingSymsAddr;
std::vector<uint64_t> TextMappingSymsAddr;
if (isArmElf(Obj)) {
std::vector<MappingSymbolPair> MappingSymbols;
if (hasMappingSymbols(Obj)) {
for (const auto &Symb : Symbols) {
uint64_t Address = std::get<0>(Symb);
StringRef Name = std::get<1>(Symb);
if (Name.startswith("$d"))
DataMappingSymsAddr.push_back(Address - SectionAddr);
MappingSymbols.emplace_back(Address - SectionAddr, 'd');
if (Name.startswith("$x"))
TextMappingSymsAddr.push_back(Address - SectionAddr);
MappingSymbols.emplace_back(Address - SectionAddr, 'x');
if (Name.startswith("$a"))
TextMappingSymsAddr.push_back(Address - SectionAddr);
MappingSymbols.emplace_back(Address - SectionAddr, 'a');
if (Name.startswith("$t"))
TextMappingSymsAddr.push_back(Address - SectionAddr);
MappingSymbols.emplace_back(Address - SectionAddr, 't');
}
}

llvm::sort(DataMappingSymsAddr);
llvm::sort(TextMappingSymsAddr);
llvm::sort(MappingSymbols);

if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
// AMDGPU disassembler uses symbolizer for printing labels
Expand Down Expand Up @@ -1269,19 +1294,18 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
Index = End;
}

bool CheckARMELFData = isArmElf(Obj) &&
bool CheckARMELFData = hasMappingSymbols(Obj) &&
std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
!DisassembleAll;
while (Index < End) {
// AArch64 ELF binaries can interleave data and text in the same
// section. We rely on the markers introduced to understand what we
// need to dump. If the data marker is within a function, it is
// ARM and AArch64 ELF binaries can interleave data and text in the
// same section. We rely on the markers introduced to understand what
// we need to dump. If the data marker is within a function, it is
// denoted as a word/short etc.
if (CheckARMELFData &&
std::binary_search(DataMappingSymsAddr.begin(),
DataMappingSymsAddr.end(), Index)) {
getMappingSymbolKind(MappingSymbols, Index) == 'd') {
Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
TextMappingSymsAddr);
MappingSymbols);
continue;
}

Expand All @@ -1302,6 +1326,16 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
}
}

if (SecondarySTI) {
if (getMappingSymbolKind(MappingSymbols, Index) == 'a') {
STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI;
DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm;
} else if (getMappingSymbolKind(MappingSymbols, Index) == 't') {
STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI;
DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm;
}
}

// Disassemble a real instruction or a data when disassemble all is
// provided
MCInst Inst;
Expand Down Expand Up @@ -1459,6 +1493,22 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
report_error(Obj->getFileName(),
"no disassembler for target " + TripleName);

// If we have an ARM object file, we need a second disassembler, because
// ARM CPUs have two different instruction sets: ARM mode, and Thumb mode.
// We use mapping symbols to switch between the two assemblers, where
// appropriate.
std::unique_ptr<MCDisassembler> SecondaryDisAsm;
std::unique_ptr<const MCSubtargetInfo> SecondarySTI;
if (isArmElf(Obj) && !STI->checkFeatures("+mclass")) {
if (STI->checkFeatures("+thumb-mode"))
Features.AddFeature("-thumb-mode");
else
Features.AddFeature("+thumb-mode");
SecondarySTI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU,
Features.getString()));
SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx));
}

std::unique_ptr<const MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));

Expand All @@ -1477,8 +1527,9 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (!IP->applyTargetSpecificCLOption(Opt))
error("Unrecognized disassembler option: " + Opt);

disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), MIA.get(), IP.get(),
STI.get(), PIP, SP, InlineRelocs);
disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(),
MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP,
SP, InlineRelocs);
}

void printRelocations(const ObjectFile *Obj) {
Expand Down

0 comments on commit d88e28d

Please sign in to comment.