New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ELF] Support R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 in non-SHF_ALLOC sections #72610
Conversation
@llvm/pr-subscribers-lld @llvm/pr-subscribers-lld-elf Author: Fangrui Song (MaskRay) ChangesFor a label difference like
Full diff: https://github.com/llvm/llvm-project/pull/72610.diff 4 Files Affected:
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 6413dcd7dcd7976..a556d89c36400d3 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -306,6 +306,8 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
case R_RISCV_TPREL_ADD:
case R_RISCV_RELAX:
return config->relax ? R_RELAX_HINT : R_NONE;
+ case R_RISCV_SET_ULEB128:
+ return R_RISCV_LEB128;
default:
error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
") against symbol " + toString(s));
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index e6942a928787a5b..2cf4de10ba062e3 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/xxhash.h"
#include <algorithm>
#include <mutex>
@@ -874,6 +875,16 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
}
}
+// Overwrite a ULEB128 value and keep the original length.
+static uint64_t overwriteULEB128(uint8_t *bufLoc, uint64_t val) {
+ while (*bufLoc & 0x80) {
+ *bufLoc++ = 0x80 | (val & 0x7f);
+ val >>= 7;
+ }
+ *bufLoc = val;
+ return val;
+}
+
// This function applies relocations to sections without SHF_ALLOC bit.
// Such sections are never mapped to memory at runtime. Debug sections are
// an example. Relocations in non-alloc sections are much easier to
@@ -885,6 +896,7 @@ template <class ELFT, class RelTy>
void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
const unsigned bits = sizeof(typename ELFT::uint) * 8;
const TargetInfo &target = *elf::target;
+ const auto emachine = config->emachine;
const bool isDebug = isDebugSection(*this);
const bool isDebugLocOrRanges =
isDebug && (name == ".debug_loc" || name == ".debug_ranges");
@@ -896,14 +908,15 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
break;
}
- for (const RelTy &rel : rels) {
+ for (size_t i = 0, relsSize = rels.size(); i != relsSize; ++i) {
+ const RelTy &rel = rels[i];
RelType type = rel.getType(config->isMips64EL);
// GCC 8.0 or earlier have a bug that they emit R_386_GOTPC relocations
// against _GLOBAL_OFFSET_TABLE_ for .debug_info. The bug has been fixed
// in 2017 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82630), but we
// need to keep this bug-compatible code for a while.
- if (config->emachine == EM_386 && type == R_386_GOTPC)
+ if (emachine == EM_386 && type == R_386_GOTPC)
continue;
uint64_t offset = rel.r_offset;
@@ -977,6 +990,24 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
continue;
}
+ if (emachine == EM_RISCV && type == R_RISCV_SET_ULEB128) {
+ if (++i < relsSize &&
+ rels[i].getType(/*isMips64EL=*/false) == R_RISCV_SUB_ULEB128 &&
+ rels[i].r_offset == offset) {
+ auto val = sym.getVA(addend) -
+ (getFile<ELFT>()->getRelocTargetSym(rels[i]).getVA(0) +
+ getAddend<ELFT>(rels[i]));
+ if (overwriteULEB128(bufLoc, val) >= 0x80)
+ errorOrWarn(getLocation(offset) + ": ULEB128 value " + Twine(val) +
+ " exceeds available space; references '" +
+ lld::toString(sym) + "'");
+ continue;
+ }
+ errorOrWarn(getLocation(offset) +
+ ": R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128");
+ return;
+ }
+
std::string msg = getLocation(offset) + ": has non-ABS relocation " +
toString(type) + " against symbol '" + toString(sym) +
"'";
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 15a2b5fc177c546..cfb9092149f3e0f 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -101,6 +101,7 @@ enum RelExpr {
R_PPC64_TOCBASE,
R_PPC64_RELAX_GOT_PC,
R_RISCV_ADD,
+ R_RISCV_LEB128,
R_RISCV_PC_INDIRECT,
// Same as R_PC but with page-aligned semantics.
R_LOONGARCH_PAGE_PC,
diff --git a/lld/test/ELF/riscv-reloc-leb128.s b/lld/test/ELF/riscv-reloc-leb128.s
new file mode 100644
index 000000000000000..a31fcaf811a3036
--- /dev/null
+++ b/lld/test/ELF/riscv-reloc-leb128.s
@@ -0,0 +1,101 @@
+# REQUIRES: riscv
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax a.s -o a.o
+# RUN: llvm-readobj -r -x .debug_rnglists a.o | FileCheck %s --check-prefix=REL
+# RUN: ld.lld -shared a.o -o a.so
+# RUN: llvm-readelf -x .debug_rnglists a.so | FileCheck %s
+
+# REL: 0x0 R_RISCV_SET_ULEB128 w1 0x83
+# REL-NEXT: 0x0 R_RISCV_SUB_ULEB128 w2 0x0
+# REL-NEXT: 0x1 R_RISCV_SET_ULEB128 w2 0x78
+# REL-NEXT: 0x1 R_RISCV_SUB_ULEB128 w1 0x0
+# REL-NEXT: 0x3 R_RISCV_SET_ULEB128 w1 0x89
+# REL-NEXT: 0x3 R_RISCV_SUB_ULEB128 w2 0x0
+# REL-NEXT: 0x5 R_RISCV_SET_ULEB128 w2 0x3FF8
+# REL-NEXT: 0x5 R_RISCV_SUB_ULEB128 w1 0x0
+# REL-NEXT: 0x8 R_RISCV_SET_ULEB128 w1 0x4009
+# REL-NEXT: 0x8 R_RISCV_SUB_ULEB128 w2 0x0
+# REL-NEXT: 0xB R_RISCV_SET_ULEB128 w2 0x1FFFF8
+# REL-NEXT: 0xB R_RISCV_SUB_ULEB128 w1 0x0
+# REL-NEXT: 0xF R_RISCV_SET_ULEB128 w1 0x200009
+# REL-NEXT: 0xF R_RISCV_SUB_ULEB128 w2 0x0
+# REL-NEXT: 0x13 R_RISCV_SET_ULEB128 w2 0x3
+# REL-NEXT: 0x13 R_RISCV_SUB_ULEB128 w1 0x4
+# REL: Hex dump of section '.debug_rnglists':
+# REL-NEXT: 0x00000000 7b800181 01808001 81800180 80800181 {
+# REL-NEXT: 0x00000010 80800100 .
+
+# CHECK: Hex dump of section '.debug_rnglists':
+# CHECK-NEXT: 0x00000000 7ffc0085 01fcff00 858001fc ffff0085 .
+# CHECK-NEXT: 0x00000010 80800103 .
+
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax sub.s -o sub.o
+# RUN: not ld.lld -shared sub.o 2>&1 | FileCheck %s --check-prefix=SUB
+# SUB: error: sub.o:(.debug_rnglists+0x8): unknown relocation (61) against symbol w2
+
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax unpaired1.s -o unpaired1.o
+# RUN: not ld.lld -shared unpaired1.o 2>&1 | FileCheck %s --check-prefix=UNPAIRED
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax unpaired2.s -o unpaired2.o
+# RUN: not ld.lld -shared unpaired2.o 2>&1 | FileCheck %s --check-prefix=UNPAIRED
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax unpaired3.s -o unpaired3.o
+# RUN: not ld.lld -shared unpaired3.o 2>&1 | FileCheck %s --check-prefix=UNPAIRED
+# UNPAIRED: error: {{.*}}.o:(.debug_rnglists+0x8): R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128
+
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax overflow.s -o overflow.o
+# RUN: not ld.lld -shared overflow.o 2>&1 | FileCheck %s --check-prefix=OVERFLOW
+# OVERFLOW: error: overflow.o:(.debug_rnglists+0x8): ULEB128 value 128 exceeds available space; references 'w2'
+
+#--- a.s
+w1:
+ call foo # 4 bytes after relaxation
+w2:
+
+.section .debug_rnglists
+.uleb128 w1-w2+131 # initial value: 0x7b
+.uleb128 w2-w1+120 # initial value: 0x0180
+.uleb128 w1-w2+137 # initial value: 0x0181
+.uleb128 w2-w1+16376 # initial value: 0x018080
+.uleb128 w1-w2+16393 # initial value: 0x018081
+.uleb128 w2-w1+2097144 # initial value: 0x01808080
+.uleb128 w1-w2+2097161 # initial value: 0x01808081
+.reloc ., R_RISCV_SET_ULEB128, w2+3
+.reloc ., R_RISCV_SUB_ULEB128, w1+4 # SUB with a non-zero addend
+.byte 0
+
+#--- sub.s
+w1: call foo; w2:
+.section .debug_rnglists
+.quad 0;
+.reloc ., R_RISCV_SUB_ULEB128, w2+120
+.byte 0x7f
+
+#--- unpaired1.s
+w1: call foo; w2:
+.section .debug_rnglists
+.quad 0;
+.reloc ., R_RISCV_SET_ULEB128, w2+120
+.byte 0x7f
+
+#--- unpaired2.s
+w1: call foo; w2:
+.section .debug_rnglists
+.quad 0
+.reloc ., R_RISCV_SET_ULEB128, w2+120
+.reloc .+1, R_RISCV_SUB_ULEB128, w1
+.byte 0x7f
+
+#--- unpaired3.s
+w1: call foo; w2:
+.section .debug_rnglists
+.quad 0
+.reloc ., R_RISCV_SET_ULEB128, w2+120
+.reloc ., R_RISCV_SUB64, w1
+.byte 0x7f
+
+#--- overflow.s
+w1: call foo; w2:
+.section .debug_rnglists
+.quad 0
+.reloc ., R_RISCV_SET_ULEB128, w2+124
+.reloc ., R_RISCV_SUB_ULEB128, w1
+.byte 0x7f
|
…C sections For a label difference like `.uleb128 A-B`, MC generates a pair of R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 if A-B cannot be folded as a constant. GNU assembler generates a pair of relocations in more cases (when A or B is in a code section with linker relaxation). `.uleb128 A-B` is primarily used by DWARF v5 .debug_loclists/.debug_rnglists (DW_LLE_offset_pair/DW_RLE_offset_pair entry kinds) implemented in Clang and GCC. `.uleb128 A-B` can be used in SHF_ALLOC sections as well (e.g. `.gcc_except_table`). This patch does not handle SHF_ALLOC. `-z dead-reloc-in-nonalloc=` can be used to change the relocated value, if the R_RISCV_SET_ULEB128 symbol is in a discarded section. We don't check the R_RISCV_SUB_ULEB128 symbol since for the expected cases A and B should be defined in the same input section.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a really elegant way to write and check for length at the same time!
|
||
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax sub.s -o sub.o | ||
# RUN: not ld.lld -shared sub.o 2>&1 | FileCheck %s --check-prefix=SUB | ||
# SUB: error: sub.o:(.debug_rnglists+0x8): unknown relocation (61) against symbol w2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we could have some better error message for orphan R_RISCV_SUB_ULEB128
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could, but I feel that the utility of the dedicated diagnostic will be very low. So it is probably not useful to have more code on it...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I agree that's not useful and not bring too much value...general that happened means some thing really broken or corrupted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, but let wait few more days to let other people has chance to take a look :)
|
||
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax sub.s -o sub.o | ||
# RUN: not ld.lld -shared sub.o 2>&1 | FileCheck %s --check-prefix=SUB | ||
# SUB: error: sub.o:(.debug_rnglists+0x8): unknown relocation (61) against symbol w2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I agree that's not useful and not bring too much value...general that happened means some thing really broken or corrupted.
@MaskRay We've seen some issues building Fuchsia since 1df5ea2 landed. We've been triaging a variety of other issues the were breaking the build, so this wasn't noticed, but now that they're cleared up we're seeing these regularly when evaluating new Toolchains. From what I can tell, this patch should fix those types of errors, correct? |
Yes, if your issue is related to |
Awsome. Thanks for the update. Looking forward to this landing. :) |
I've received a report from another user and this patch addresses the non-SHF_ALLOC section problem. I plan to land this tomorrow. |
…ctions (llvm#77261) Complement llvm#72610 (non-SHF_ALLOC sections). GCC-generated .gcc_exception_table has the SHF_ALLOC flag and may contain R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 relocations.
…C sections (llvm#72610) For a label difference like `.uleb128 A-B`, MC generates a pair of R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 if A-B cannot be folded as a constant. GNU assembler generates a pair of relocations in more cases (when A or B is in a code section with linker relaxation). `.uleb128 A-B` is primarily used by DWARF v5 .debug_loclists/.debug_rnglists (DW_LLE_offset_pair/DW_RLE_offset_pair entry kinds) implemented in Clang and GCC. `.uleb128 A-B` can be used in SHF_ALLOC sections as well (e.g. `.gcc_except_table`). This patch does not handle SHF_ALLOC. `-z dead-reloc-in-nonalloc=` can be used to change the relocated value, if the R_RISCV_SET_ULEB128 symbol is in a discarded section. We don't check the R_RISCV_SUB_ULEB128 symbol since for the expected cases A and B should be defined in the same input section.
…ctions (llvm#77261) Complement llvm#72610 (non-SHF_ALLOC sections). GCC-generated .gcc_exception_table has the SHF_ALLOC flag and may contain R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 relocations.
…C sections (llvm#72610) For a label difference like `.uleb128 A-B`, MC generates a pair of R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 if A-B cannot be folded as a constant. GNU assembler generates a pair of relocations in more cases (when A or B is in a code section with linker relaxation). `.uleb128 A-B` is primarily used by DWARF v5 .debug_loclists/.debug_rnglists (DW_LLE_offset_pair/DW_RLE_offset_pair entry kinds) implemented in Clang and GCC. `.uleb128 A-B` can be used in SHF_ALLOC sections as well (e.g. `.gcc_except_table`). This patch does not handle SHF_ALLOC. `-z dead-reloc-in-nonalloc=` can be used to change the relocated value, if the R_RISCV_SET_ULEB128 symbol is in a discarded section. We don't check the R_RISCV_SUB_ULEB128 symbol since for the expected cases A and B should be defined in the same input section.
…ctions (llvm#77261) Complement llvm#72610 (non-SHF_ALLOC sections). GCC-generated .gcc_exception_table has the SHF_ALLOC flag and may contain R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 relocations.
…C sections (llvm#72610) For a label difference like `.uleb128 A-B`, MC generates a pair of R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 if A-B cannot be folded as a constant. GNU assembler generates a pair of relocations in more cases (when A or B is in a code section with linker relaxation). `.uleb128 A-B` is primarily used by DWARF v5 .debug_loclists/.debug_rnglists (DW_LLE_offset_pair/DW_RLE_offset_pair entry kinds) implemented in Clang and GCC. `.uleb128 A-B` can be used in SHF_ALLOC sections as well (e.g. `.gcc_except_table`). This patch does not handle SHF_ALLOC. `-z dead-reloc-in-nonalloc=` can be used to change the relocated value, if the R_RISCV_SET_ULEB128 symbol is in a discarded section. We don't check the R_RISCV_SUB_ULEB128 symbol since for the expected cases A and B should be defined in the same input section.
…ctions (llvm#77261) Complement llvm#72610 (non-SHF_ALLOC sections). GCC-generated .gcc_exception_table has the SHF_ALLOC flag and may contain R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 relocations.
For a label difference like
.uleb128 A-B
, MC generates a pair ofR_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 if A-B cannot be folded as a
constant. GNU assembler generates a pair of relocations in more cases
(when A or B is in a code section with linker relaxation).
.uleb128 A-B
is primarily used by DWARF v5.debug_loclists/.debug_rnglists (DW_LLE_offset_pair/DW_RLE_offset_pair
entry kinds) implemented in Clang and GCC.
.uleb128 A-B
can be used in SHF_ALLOC sections as well (e.g..gcc_except_table
). This patch does not handle SHF_ALLOC.-z dead-reloc-in-nonalloc=
can be used to change the relocated value,if the R_RISCV_SET_ULEB128 symbol is in a discarded section. We don't
check the R_RISCV_SUB_ULEB128 symbol since for the expected cases A and
B should be defined in the same input section.