Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV][LLD] Add RISCV zcmt optimise in linker relaxation #77884

Open
wants to merge 54 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
4846c45
Add tablejump support in lld linker relaxation
Xinlong-Wu Sep 20, 2022
0d7cccc
reuse reloc type R_RISCV_JAL
Xinlong-Wu Sep 24, 2022
a57d853
format
Xinlong-Wu Sep 24, 2022
50502da
fix TODO
Xinlong-Wu Sep 24, 2022
a7545d8
fix name
Xinlong-Wu Sep 24, 2022
9153af3
format
Xinlong-Wu Sep 24, 2022
1b39df2
fix compile erroe
Xinlong-Wu Sep 24, 2022
59111f4
add testcase
Xinlong-Wu Sep 25, 2022
d0c5fcf
change the priority order of cm.jt/cm.jalt relax
Xinlong-Wu Sep 26, 2022
99e08c8
address comments
Xinlong-Wu Sep 26, 2022
adbd7f4
fix part of comments
Xinlong-Wu Sep 26, 2022
726883a
fmt
Xinlong-Wu Sep 26, 2022
9879205
tmp
Xinlong-Wu Sep 26, 2022
b912fef
update option start with `--`
Xinlong-Wu Oct 27, 2022
192f501
reimplement Zcmt relax
Xinlong-Wu Jan 6, 2023
74cf627
fix testcase
Xinlong-Wu Jan 6, 2023
96d0f36
rebase & update
Xinlong-Wu Jan 6, 2023
3e79817
git format
Xinlong-Wu Jan 6, 2023
451a817
write table entry to .riscv.jvt section
Xinlong-Wu Jan 6, 2023
e5f78da
format
Xinlong-Wu Jan 6, 2023
a7d74e9
address comments
Xinlong-Wu Jan 6, 2023
c116a18
format
Xinlong-Wu Jan 7, 2023
51b7fd3
move TableJumpSection to Arch/RISCV.cpp
Xinlong-Wu Jan 7, 2023
5aff069
format
Xinlong-Wu Jan 7, 2023
51be215
stop relax to cm.jalt if it has negative
Xinlong-Wu Mar 24, 2023
d317949
format
Xinlong-Wu Mar 24, 2023
14a988b
fix testcase
Xinlong-Wu Mar 24, 2023
48be3d1
store symbol instade of symbol name
Xinlong-Wu Apr 3, 2023
f9b6db9
format
Xinlong-Wu Apr 4, 2023
1590a32
extend sizeof InputSection
Xinlong-Wu Apr 22, 2023
6f07430
Revert "extend sizeof InputSection"
Xinlong-Wu Apr 24, 2023
9c4e5fa
use define instead of magic num
Xinlong-Wu Jun 27, 2023
29d11b6
address comments
Xinlong-Wu Jun 27, 2023
18d4a81
use INTERNAL_ relocation type for Zcmt
Xinlong-Wu Sep 7, 2023
565e679
fix error
Xinlong-Wu Oct 7, 2023
1927643
rename getEntry -> getIndex
Xinlong-Wu Nov 27, 2023
ff7981b
refactor getIndex
Xinlong-Wu Nov 27, 2023
aeb18bc
address comments
Xinlong-Wu Nov 28, 2023
18f35de
fmt
Xinlong-Wu Nov 28, 2023
9f59bab
fix symble of __jvt_base$
Xinlong-Wu Dec 6, 2023
0e48c43
add a warning when SizeReduction <= 0
Xinlong-Wu Dec 6, 2023
e562d31
add riscvTableJumpSection only when SizeReduction > 0
Xinlong-Wu Dec 8, 2023
750f493
fmt
Xinlong-Wu Dec 8, 2023
2d5ca0f
format the patch
Xinlong-Wu Jan 16, 2024
f0be958
format patch again
Xinlong-Wu Jan 16, 2024
f69a0a0
rename riscv-tbljal -> relax-tbljal, use int32_t to allow negative ef…
Xinlong-Wu Jan 25, 2024
8897e0f
format
Xinlong-Wu Jan 25, 2024
4e5ee18
address comments
Xinlong-Wu Mar 11, 2024
c50d354
fmt
Xinlong-Wu Mar 11, 2024
578e726
Merge branch 'main' into zce-zcmt-lld
Xinlong-Wu Mar 11, 2024
563d704
Merge remote-tracking branch 'LLVM_Upstream/main' into zce-zcmt-lld
Xinlong-Wu Jun 20, 2024
d1a83f1
update
Xinlong-Wu Jun 20, 2024
99b7c6a
Merge remote-tracking branch 'LLVM_Upstream/main' into zce-zcmt-lld
Xinlong-Wu Jun 20, 2024
b23e351
Merge remote-tracking branch 'github/zce-zcmt-lld' into zce-zcmt-lld
Xinlong-Wu Jun 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added a.out
Binary file not shown.
271 changes: 270 additions & 1 deletion lld/ELF/Arch/RISCV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class RISCV final : public TargetInfo {
void writePltHeader(uint8_t *buf) const override;
void writePlt(uint8_t *buf, const Symbol &sym,
uint64_t pltEntryAddr) const override;
void writeTableJumpHeader(uint8_t *buf) const override;
void writeTableJumpEntry(uint8_t *buf, const uint64_t symbol) const override;
RelType getDynRel(RelType type) const override;
RelExpr getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const override;
Expand All @@ -54,6 +56,7 @@ class RISCV final : public TargetInfo {
// of the psABI spec.
#define INTERNAL_R_RISCV_GPREL_I 256
#define INTERNAL_R_RISCV_GPREL_S 257
#define INTERNAL_R_RISCV_TBJAL 258

const uint64_t dtpOffset = 0x800;

Expand Down Expand Up @@ -255,6 +258,20 @@ void RISCV::writePlt(uint8_t *buf, const Symbol &sym,
write32le(buf + 12, itype(ADDI, 0, 0, 0));
}

void RISCV::writeTableJumpHeader(uint8_t *buf) const {
if (config->is64)
write64le(buf, mainPart->dynamic->getVA());
else
write32le(buf, mainPart->dynamic->getVA());
}

void RISCV::writeTableJumpEntry(uint8_t *buf, const uint64_t address) const {
if (config->is64)
write64le(buf, address);
else
write32le(buf, address);
}

RelType RISCV::getDynRel(RelType type) const {
return type == target->symbolicRel ? type
: static_cast<RelType>(R_RISCV_NONE);
Expand Down Expand Up @@ -478,6 +495,9 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
return;
}

case INTERNAL_R_RISCV_TBJAL:
return;

case R_RISCV_ADD8:
*loc += val;
return;
Expand Down Expand Up @@ -731,6 +751,30 @@ void elf::initSymbolAnchors() {
}
}

static bool relaxTableJump(const InputSection &sec, size_t i, uint64_t loc,
Relocation &r, uint32_t &remove) {
if (!in.riscvTableJumpSection || !in.riscvTableJumpSection->isFinalized)
return false;

const auto jalr = sec.contentMaybeDecompress().data()[r.offset + 4];
const uint8_t rd = extractBits(jalr, 11, 7);
int tblEntryIndex = -1;
if (rd == 0) {
tblEntryIndex = in.riscvTableJumpSection->getCMJTEntryIndex(r.sym);
} else if (rd == X_RA) {
tblEntryIndex = in.riscvTableJumpSection->getCMJALTEntryIndex(r.sym);
}

if (tblEntryIndex >= 0) {
sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_TBJAL;
sec.relaxAux->writes.push_back(0xA002 |
(tblEntryIndex << 2)); // cm.jt or cm.jalt
remove = 6;
return true;
}
return false;
}

// Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal.
static void relaxCall(const InputSection &sec, size_t i, uint64_t loc,
Relocation &r, uint32_t &remove) {
Expand All @@ -751,7 +795,7 @@ static void relaxCall(const InputSection &sec, size_t i, uint64_t loc,
sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
sec.relaxAux->writes.push_back(0x2001); // c.jal
remove = 6;
} else if (isInt<21>(displace)) {
} else if (!relaxTableJump(sec, i, loc, r, remove) && isInt<21>(displace)) {
sec.relaxAux->relocTypes[i] = R_RISCV_JAL;
sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal
remove = 4;
Expand Down Expand Up @@ -859,6 +903,11 @@ static bool relax(InputSection &sec) {
if (relaxable(relocs, i))
relaxHi20Lo12(sec, i, loc, r, remove);
break;
case R_RISCV_JAL:
if (i + 1 != sec.relocations.size() &&
sec.relocations[i + 1].type == R_RISCV_RELAX)
relaxTableJump(sec, i, loc, r, remove);
break;
case R_RISCV_TLSDESC_HI20:
// For TLSDESC=>LE, we can use the short form if hi20 is zero.
tlsdescRelax = relaxable(relocs, i);
Expand Down Expand Up @@ -990,6 +1039,12 @@ void RISCV::finalizeRelax(int passes) const {
case INTERNAL_R_RISCV_GPREL_I:
case INTERNAL_R_RISCV_GPREL_S:
break;
case INTERNAL_R_RISCV_TBJAL:
assert(config->relaxTbljal);
assert((aux.writes[writesIdx] & 0xfc03) == 0xA002);
skip = 2;
write16le(p, aux.writes[writesIdx++]);
break;
case R_RISCV_RELAX:
// Used by relaxTlsLe to indicate the relocation is ignored.
break;
Expand All @@ -1001,6 +1056,8 @@ void RISCV::finalizeRelax(int passes) const {
skip = 4;
write32le(p, aux.writes[writesIdx++]);
break;
case R_RISCV_64:
break;
Xinlong-Wu marked this conversation as resolved.
Show resolved Hide resolved
case R_RISCV_32:
// Used by relaxTlsLe to write a uint32_t then suppress the handling
// in relocateAlloc.
Expand Down Expand Up @@ -1234,3 +1291,215 @@ TargetInfo *elf::getRISCVTargetInfo() {
static RISCV target;
return &target;
}

TableJumpSection::TableJumpSection()
: SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS,
config->wordsize, ".riscv.jvt") {}

void TableJumpSection::addCMJTEntryCandidate(const Symbol *symbol,
int csReduction) {
addEntry(symbol, CMJTEntryCandidates, csReduction);
}

int TableJumpSection::getCMJTEntryIndex(const Symbol *symbol) {
uint32_t index = getIndex(symbol, maxCMJTEntrySize, finalizedCMJTEntries);
return index < finalizedCMJTEntries.size() ? (int)(startCMJTEntryIdx + index)
: -1;
}

void TableJumpSection::addCMJALTEntryCandidate(const Symbol *symbol,
int csReduction) {
addEntry(symbol, CMJALTEntryCandidates, csReduction);
}

int TableJumpSection::getCMJALTEntryIndex(const Symbol *symbol) {
uint32_t index = getIndex(symbol, maxCMJALTEntrySize, finalizedCMJALTEntries);
return index < finalizedCMJALTEntries.size()
? (int)(startCMJALTEntryIdx + index)
: -1;
}

void TableJumpSection::addEntry(
const Symbol *symbol, llvm::DenseMap<const Symbol *, int> &entriesList,
int csReduction) {
entriesList[symbol] += csReduction;
}

uint32_t TableJumpSection::getIndex(
const Symbol *symbol, uint32_t maxSize,
SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
&entriesList) {
// Find this symbol in the ordered list of entries if it exists.
assert(maxSize >= entriesList.size() &&
"Finalized vector of entries exceeds maximum");
auto idx = std::find_if(
entriesList.begin(), entriesList.end(),
[symbol](llvm::detail::DenseMapPair<const Symbol *, int> &e) {
return e.first == symbol;
});

if (idx == entriesList.end())
return entriesList.size();
return idx - entriesList.begin();
}

void TableJumpSection::scanTableJumpEntries(const InputSection &sec) const {
for (auto [i, r] : llvm::enumerate(sec.relocations)) {
Defined *definedSymbol = dyn_cast<Defined>(r.sym);
if (!definedSymbol)
continue;
if (i + 1 == sec.relocs().size() ||
sec.relocs()[i + 1].type != R_RISCV_RELAX)
continue;
switch (r.type) {
case R_RISCV_JAL:
case R_RISCV_CALL:
case R_RISCV_CALL_PLT: {
const auto jalr = sec.contentMaybeDecompress().data()[r.offset + 4];
const uint8_t rd = extractBits(jalr, 11, 7);

int csReduction = 6;
if (sec.relaxAux->relocTypes[i] == R_RISCV_RVC_JUMP)
continue;
else if (sec.relaxAux->relocTypes[i] == R_RISCV_JAL)
csReduction = 2;

if (rd == 0)
in.riscvTableJumpSection->addCMJTEntryCandidate(r.sym, csReduction);
else if (rd == X_RA)
in.riscvTableJumpSection->addCMJALTEntryCandidate(r.sym, csReduction);
}
}
}
}

void TableJumpSection::finalizeContents() {
if (isFinalized)
return;
isFinalized = true;

finalizedCMJTEntries = finalizeEntry(CMJTEntryCandidates, maxCMJTEntrySize);
CMJTEntryCandidates.clear();
int32_t CMJTSizeReduction = getSizeReduction();
finalizedCMJALTEntries =
finalizeEntry(CMJALTEntryCandidates, maxCMJALTEntrySize);
CMJALTEntryCandidates.clear();

if (finalizedCMJALTEntries.size() > 0 &&
getSizeReduction() < CMJTSizeReduction) {
// Stop relax to cm.jalt if there will be the code reduction of cm.jalt is
// smaller then the size of padding 0 for doing cm.jalt optmise
finalizedCMJALTEntries.clear();
}
// if table jump still got negative effect, give up.
if (getSizeReduction() <= 0) {
warn("Table Jump Relaxation didn't got any reduction for code size.");
finalizedCMJTEntries.clear();
}
}

// Sort the map in decreasing order of the amount of code reduction provided
// by the entries. Drop any entries that can't fit in the map from the tail
// end since they provide less code reduction. Drop any entries that cause
// an increase in code size (i.e. the reduction from instruction conversion
// does not cover the code size gain from adding a table entry).
SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
TableJumpSection::finalizeEntry(llvm::DenseMap<const Symbol *, int> EntryMap,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

finalizeEntry seems not modifying any section members and the input EntryMap?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we actually do following 3 things

  1. sort the EntryMap as decrease order by size reduction of each item in EntryMap
  2. drop rest if EntryMap larger then maxSize
  3. drop the item that have a negative effect

uint32_t maxSize) {
auto cmp = [](const llvm::detail::DenseMapPair<const Symbol *, int> &p1,
const llvm::detail::DenseMapPair<const Symbol *, int> &p2) {
return p1.second > p2.second;
};

SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
tempEntryVector;
std::copy(EntryMap.begin(), EntryMap.end(),
std::back_inserter(tempEntryVector));
std::sort(tempEntryVector.begin(), tempEntryVector.end(), cmp);

auto finalizedVector = tempEntryVector;
if (tempEntryVector.size() >= maxSize)
finalizedVector =
SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>(
tempEntryVector.begin(), tempEntryVector.begin() + maxSize);

// Drop any items that have a negative effect (i.e. increase code size).
while (finalizedVector.size()) {
if (finalizedVector.rbegin()->second < config->wordsize)
finalizedVector.pop_back();
else
break;
}
return finalizedVector;
}

size_t TableJumpSection::getSize() const {
if (isFinalized) {
if (!finalizedCMJALTEntries.empty())
return (startCMJALTEntryIdx + finalizedCMJALTEntries.size()) *
config->wordsize;
return (startCMJTEntryIdx + finalizedCMJTEntries.size()) * config->wordsize;
} else {
if (!CMJALTEntryCandidates.empty())
return (startCMJALTEntryIdx + CMJALTEntryCandidates.size()) *
config->wordsize;
return (startCMJTEntryIdx + CMJTEntryCandidates.size()) * config->wordsize;
}
}

int32_t TableJumpSection::getSizeReduction() {
Xinlong-Wu marked this conversation as resolved.
Show resolved Hide resolved
// The total reduction in code size is J + JA - JTS - JAE.
// Where:
// J = number of bytes saved for all the cm.jt instructions emitted
// JA = number of bytes saved for all the cm.jalt instructions emitted
// JTS = size of the part of the table for cm.jt jumps (i.e. 32 x wordsize)
// JAE = number of entries emitted for the cm.jalt jumps x wordsize

int32_t sizeReduction = -getSize();
for (auto entry : finalizedCMJTEntries) {
sizeReduction += entry.second;
}
for (auto entry : finalizedCMJALTEntries) {
sizeReduction += entry.second;
}
return sizeReduction;
}

void TableJumpSection::writeTo(uint8_t *buf) {
if (getSizeReduction() <= 0)
return;
target->writeTableJumpHeader(buf);
writeEntries(buf + startCMJTEntryIdx * config->wordsize,
finalizedCMJTEntries);
if (finalizedCMJALTEntries.size() > 0) {
padWords(buf + ((startCMJTEntryIdx + finalizedCMJTEntries.size()) *
config->wordsize),
startCMJALTEntryIdx);
writeEntries(buf + (startCMJALTEntryIdx * config->wordsize),
finalizedCMJALTEntries);
}
}

void TableJumpSection::padWords(uint8_t *buf, const uint8_t maxWordCount) {
for (size_t i = 0; i < maxWordCount; ++i) {
if (config->is64)
write64le(buf + i, 0);
else
write32le(buf + i, 0);
}
}

void TableJumpSection::writeEntries(
uint8_t *buf,
SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
&entriesList) {
for (const auto &entry : entriesList) {
assert(entry.second > 0);
// Use the symbol from in.symTab to ensure we have the final adjusted
// symbol.
if (!entry.first->isDefined())
continue;
target->writeTableJumpEntry(buf, entry.first->getVA());
buf += config->wordsize;
}
}
1 change: 1 addition & 0 deletions lld/ELF/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ struct Config {
bool resolveGroups;
bool relrGlibc = false;
bool relrPackDynRelocs = false;
bool relaxTbljal;
llvm::DenseSet<llvm::StringRef> saveTempsArgs;
llvm::SmallVector<std::pair<llvm::GlobPattern, uint32_t>, 0> shuffleSections;
bool singleRoRx;
Expand Down
1 change: 1 addition & 0 deletions lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,7 @@ static void readConfigs(opt::InputArgList &args) {
config->whyExtract = args.getLastArgValue(OPT_why_extract);
config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
config->relaxTbljal = args.hasArg(OPT_relax_tbljal);
config->zForceBti = hasZOption(args, "force-bti");
config->zForceIbt = hasZOption(args, "force-ibt");
config->zGcs = getZGcs(args);
Expand Down
3 changes: 3 additions & 0 deletions lld/ELF/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,9 @@ defm use_android_relr_tags: BB<"use-android-relr-tags",
"Use SHT_ANDROID_RELR / DT_ANDROID_RELR* tags instead of SHT_RELR / DT_RELR*",
"Use SHT_RELR / DT_RELR* tags (default)">;

def relax_tbljal: FF<"relax-tbljal">,
HelpText<"Enable conversion of call instructions to table jump instruction from the Zcmt extension for frequently called functions (RISC-V only)">;

def pic_veneer: F<"pic-veneer">,
HelpText<"Always generate position independent thunks (veneers)">;

Expand Down
10 changes: 10 additions & 0 deletions lld/ELF/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4833,6 +4833,16 @@ template <class ELFT> void elf::createSyntheticSections() {
add(*in.ppc64LongBranchTarget);
}

if (config->emachine == EM_RISCV && config->relaxTbljal) {
in.riscvTableJumpSection = std::make_unique<TableJumpSection>();
add(*in.riscvTableJumpSection);

Symbol *s = symtab.addSymbol(Defined{
/*file=*/nullptr, "__jvt_base$", STB_GLOBAL, STT_NOTYPE, STT_NOTYPE,
/*value=*/0, /*size=*/0, in.riscvTableJumpSection.get()});
s->isUsedInRegularObj = true;
}

in.gotPlt = std::make_unique<GotPltSection>();
add(*in.gotPlt);
in.igotPlt = std::make_unique<IgotPltSection>();
Expand Down
Loading