-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LLD][COFF] Fix tailMergeARM64 delayload thunk 128 MB range limitation #161844
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-platform-windows @llvm/pr-subscribers-lld-coff Author: Hans Wennborg (zmodem) Changeslld would fail with "error: relocation out of range" if the thunk was laid out more than 128 MB away from __delayLoadHelper2. This patch changes the call sequence to load the offset into a register and call through that, allowing for 32-bit offsets. Fixes #161812 Full diff: https://github.com/llvm/llvm-project/pull/161844.diff 3 Files Affected:
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 3ce8853adb2a2..f4284efee8d4d 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -333,7 +333,9 @@ static const uint8_t tailMergeARM64[] = {
0xe1, 0x03, 0x11, 0xaa, // mov x1, x17
0x00, 0x00, 0x00, 0x90, // adrp x0, #0 DELAY_IMPORT_DESCRIPTOR
0x00, 0x00, 0x00, 0x91, // add x0, x0, #0 :lo12:DELAY_IMPORT_DESCRIPTOR
- 0x00, 0x00, 0x00, 0x94, // bl #0 __delayLoadHelper2
+ 0x02, 0x00, 0x00, 0x90, // adrp x2, #0 __delayLoadHelper2
+ 0x42, 0x00, 0x00, 0x91, // add x2, x2, #0 :lo12:__delayLoadHelper2
+ 0x40, 0x00, 0x3f, 0xd6, // blr x2
0xf0, 0x03, 0x00, 0xaa, // mov x16, x0
0xe6, 0x9f, 0x45, 0xad, // ldp q6, q7, [sp, #176]
0xe4, 0x97, 0x44, 0xad, // ldp q4, q5, [sp, #144]
@@ -556,8 +558,10 @@ class TailMergeChunkARM64 : public NonSectionCodeChunk {
memcpy(buf, tailMergeARM64, sizeof(tailMergeARM64));
applyArm64Addr(buf + 44, desc->getRVA(), rva + 44, 12);
applyArm64Imm(buf + 48, desc->getRVA() & 0xfff, 0);
- if (helper)
- applyArm64Branch26(buf + 52, helper->getRVA() - rva - 52);
+ if (helper) {
+ applyArm64Addr(buf + 52, helper->getRVA(), rva + 52, 12);
+ applyArm64Imm(buf + 56, helper->getRVA() & 0xfff, 0);
+ }
}
Chunk *desc = nullptr;
diff --git a/lld/test/COFF/arm64-delayimport.yaml b/lld/test/COFF/arm64-delayimport.yaml
index abb9f25d5c379..7090206dea38a 100644
--- a/lld/test/COFF/arm64-delayimport.yaml
+++ b/lld/test/COFF/arm64-delayimport.yaml
@@ -21,18 +21,20 @@
# DISASM: 140001048: aa1103e1 mov x1, x17
# DISASM: 14000104c: b0000000 adrp x0, 0x140002000
# DISASM: 140001050: 91000000 add x0, x0, #0
-# DISASM: 140001054: 97ffffeb bl 0x140001000 <.text>
-# DISASM: 140001058: aa0003f0 mov x16, x0
-# DISASM: 14000105c: ad459fe6 ldp q6, q7, [sp, #176]
-# DISASM: 140001060: ad4497e4 ldp q4, q5, [sp, #144]
-# DISASM: 140001064: ad438fe2 ldp q2, q3, [sp, #112]
-# DISASM: 140001068: ad4287e0 ldp q0, q1, [sp, #80]
-# DISASM: 14000106c: a9441fe6 ldp x6, x7, [sp, #64]
-# DISASM: 140001070: a94317e4 ldp x4, x5, [sp, #48]
-# DISASM: 140001074: a9420fe2 ldp x2, x3, [sp, #32]
-# DISASM: 140001078: a94107e0 ldp x0, x1, [sp, #16]
-# DISASM: 14000107c: a8cd7bfd ldp x29, x30, [sp], #208
-# DISASM: 140001080: d61f0200 br x16
+# DISASM: 140001054: 90000002 adrp x2, 0x140001000 <.text>
+# DISASM: 140001058: 91000042 add x2, x2, #0
+# DISASM: 14000105c: d63f0040 blr x2
+# DISASM: 140001060: aa0003f0 mov x16, x0
+# DISASM: 140001064: ad459fe6 ldp q6, q7, [sp, #176]
+# DISASM: 140001068: ad4497e4 ldp q4, q5, [sp, #144]
+# DISASM: 14000106c: ad438fe2 ldp q2, q3, [sp, #112]
+# DISASM: 140001070: ad4287e0 ldp q0, q1, [sp, #80]
+# DISASM: 140001074: a9441fe6 ldp x6, x7, [sp, #64]
+# DISASM: 140001078: a94317e4 ldp x4, x5, [sp, #48]
+# DISASM: 14000107c: a9420fe2 ldp x2, x3, [sp, #32]
+# DISASM: 140001080: a94107e0 ldp x0, x1, [sp, #16]
+# DISASM: 140001084: a8cd7bfd ldp x29, x30, [sp], #208
+# DISASM: 140001088: d61f0200 br x16
# IMPORTS: Format: COFF-ARM64
# IMPORTS: Arch: aarch64
diff --git a/lld/test/COFF/arm64x-delayimport.test b/lld/test/COFF/arm64x-delayimport.test
index 2a68bce79baad..e22cc6d5c42fc 100644
--- a/lld/test/COFF/arm64x-delayimport.test
+++ b/lld/test/COFF/arm64x-delayimport.test
@@ -74,18 +74,20 @@ DISASM-NEXT: 180001044: ad059fe6 stp q6, q7, [sp, #0xb0]
DISASM-NEXT: 180001048: aa1103e1 mov x1, x17
DISASM-NEXT: 18000104c: f0000000 adrp x0, 0x180004000
DISASM-NEXT: 180001050: 910d2000 add x0, x0, #0x348
-DISASM-NEXT: 180001054: 97ffffeb bl 0x180001000 <.text>
-DISASM-NEXT: 180001058: aa0003f0 mov x16, x0
-DISASM-NEXT: 18000105c: ad459fe6 ldp q6, q7, [sp, #0xb0]
-DISASM-NEXT: 180001060: ad4497e4 ldp q4, q5, [sp, #0x90]
-DISASM-NEXT: 180001064: ad438fe2 ldp q2, q3, [sp, #0x70]
-DISASM-NEXT: 180001068: ad4287e0 ldp q0, q1, [sp, #0x50]
-DISASM-NEXT: 18000106c: a9441fe6 ldp x6, x7, [sp, #0x40]
-DISASM-NEXT: 180001070: a94317e4 ldp x4, x5, [sp, #0x30]
-DISASM-NEXT: 180001074: a9420fe2 ldp x2, x3, [sp, #0x20]
-DISASM-NEXT: 180001078: a94107e0 ldp x0, x1, [sp, #0x10]
-DISASM-NEXT: 18000107c: a8cd7bfd ldp x29, x30, [sp], #0xd0
-DISASM-NEXT: 180001080: d61f0200 br x16
+DISASM-NEXT: 180001054: 90000002 adrp x2, 0x180001000 <.text>
+DISASM-NEXT: 180001058: 91000042 add x2, x2, #0x0
+DISASM-NEXT: 18000105c: d63f0040 blr x2
+DISASM-NEXT: 180001060: aa0003f0 mov x16, x0
+DISASM-NEXT: 180001064: ad459fe6 ldp q6, q7, [sp, #0xb0]
+DISASM-NEXT: 180001068: ad4497e4 ldp q4, q5, [sp, #0x90]
+DISASM-NEXT: 18000106c: ad438fe2 ldp q2, q3, [sp, #0x70]
+DISASM-NEXT: 180001070: ad4287e0 ldp q0, q1, [sp, #0x50]
+DISASM-NEXT: 180001074: a9441fe6 ldp x6, x7, [sp, #0x40]
+DISASM-NEXT: 180001078: a94317e4 ldp x4, x5, [sp, #0x30]
+DISASM-NEXT: 18000107c: a9420fe2 ldp x2, x3, [sp, #0x20]
+DISASM-NEXT: 180001080: a94107e0 ldp x0, x1, [sp, #0x10]
+DISASM-NEXT: 180001084: a8cd7bfd ldp x29, x30, [sp], #0xd0
+DISASM-NEXT: 180001088: d61f0200 br x16
DISASM-NEXT: ...
DISASM-NEXT: 180002000: 52800040 mov w0, #0x2 // =2
DISASM-NEXT: 180002004: d65f03c0 ret
@@ -197,18 +199,20 @@ NATIVE-DISASM-NEXT: 180001044: ad059fe6 stp q6, q7, [sp, #0xb0]
NATIVE-DISASM-NEXT: 180001048: aa1103e1 mov x1, x17
NATIVE-DISASM-NEXT: 18000104c: d0000000 adrp x0, 0x180003000
NATIVE-DISASM-NEXT: 180001050: 910cc000 add x0, x0, #0x330
-NATIVE-DISASM-NEXT: 180001054: 97ffffeb bl 0x180001000 <.text>
-NATIVE-DISASM-NEXT: 180001058: aa0003f0 mov x16, x0
-NATIVE-DISASM-NEXT: 18000105c: ad459fe6 ldp q6, q7, [sp, #0xb0]
-NATIVE-DISASM-NEXT: 180001060: ad4497e4 ldp q4, q5, [sp, #0x90]
-NATIVE-DISASM-NEXT: 180001064: ad438fe2 ldp q2, q3, [sp, #0x70]
-NATIVE-DISASM-NEXT: 180001068: ad4287e0 ldp q0, q1, [sp, #0x50]
-NATIVE-DISASM-NEXT: 18000106c: a9441fe6 ldp x6, x7, [sp, #0x40]
-NATIVE-DISASM-NEXT: 180001070: a94317e4 ldp x4, x5, [sp, #0x30]
-NATIVE-DISASM-NEXT: 180001074: a9420fe2 ldp x2, x3, [sp, #0x20]
-NATIVE-DISASM-NEXT: 180001078: a94107e0 ldp x0, x1, [sp, #0x10]
-NATIVE-DISASM-NEXT: 18000107c: a8cd7bfd ldp x29, x30, [sp], #0xd0
-NATIVE-DISASM-NEXT: 180001080: d61f0200 br x16
+NATIVE-DISASM-NEXT: 180001054: 90000002 adrp x2, 0x180001000 <.text>
+NATIVE-DISASM-NEXT: 180001058: 91000042 add x2, x2, #0x0
+NATIVE-DISASM-NEXT: 18000105c: d63f0040 blr x2
+NATIVE-DISASM-NEXT: 180001060: aa0003f0 mov x16, x0
+NATIVE-DISASM-NEXT: 180001064: ad459fe6 ldp q6, q7, [sp, #0xb0]
+NATIVE-DISASM-NEXT: 180001068: ad4497e4 ldp q4, q5, [sp, #0x90]
+NATIVE-DISASM-NEXT: 18000106c: ad438fe2 ldp q2, q3, [sp, #0x70]
+NATIVE-DISASM-NEXT: 180001070: ad4287e0 ldp q0, q1, [sp, #0x50]
+NATIVE-DISASM-NEXT: 180001074: a9441fe6 ldp x6, x7, [sp, #0x40]
+NATIVE-DISASM-NEXT: 180001078: a94317e4 ldp x4, x5, [sp, #0x30]
+NATIVE-DISASM-NEXT: 18000107c: a9420fe2 ldp x2, x3, [sp, #0x20]
+NATIVE-DISASM-NEXT: 180001080: a94107e0 ldp x0, x1, [sp, #0x10]
+NATIVE-DISASM-NEXT: 180001084: a8cd7bfd ldp x29, x30, [sp], #0xd0
+NATIVE-DISASM-NEXT: 180001088: d61f0200 br x16
RUN: llvm-readobj --coff-load-config out-native.dll | FileCheck --check-prefix=NATIVE-LOADCFG %s
NATIVE-LOADCFG: AuxiliaryDelayloadIAT: 0x4000
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This LGTM, but I'd leave it open for comments from others as well, in particular if @cjacek has some arm64ec specific concern.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
lld would fail with "error: relocation out of range" if the thunk was laid out more than 128 MB away from __delayLoadHelper2.
This patch changes the call sequence to load the offset into a register and call through that, allowing for 32-bit offsets.
Fixes #161812