Skip to content

Commit

Permalink
[AArch64] Implement -fno-plt for SelectionDAG/GlobalISel
Browse files Browse the repository at this point in the history
Clang sets the nonlazybind attribute for certain ObjC features. The
AArch64 SelectionDAG implementation for non-intrinsic calls
(46e36f0) is behind a cl option.

GCC implements -fno-plt for a few ELF targets. In Clang, -fno-plt also
sets the nonlazybind attribute. For SelectionDAG, make the cl option not
affect ELF so that non-intrinsic calls to a dso_preemptable function use
GOT. Adjust AArch64TargetLowering::LowerCall to handle intrinsic calls.

For FastISel, change `fastLowerCall` to bail out when a call is due to
-fno-plt.

For GlobalISel, handle non-intrinsic calls in CallLowering::lowerCall
and intrinsic calls in AArch64CallLowering::lowerCall (where the
target-independent CallLowering::lowerCall is not called).
The GlobalISel test in `call-rv-marker.ll` is therefore updated.

Note: the current -fno-plt -fpic implementation does not use GOT for a
preemptable function.

Link: #78275

Pull Request: #78890
  • Loading branch information
MaskRay committed Mar 5, 2024
1 parent bf631c6 commit 201572e
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 68 deletions.
12 changes: 9 additions & 3 deletions llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
// Try looking through a bitcast from one function type to another.
// Commonly happens with calls to objc_msgSend().
const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts();
if (const Function *F = dyn_cast<Function>(CalleeV))
Info.Callee = MachineOperand::CreateGA(F, 0);
else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
if (const Function *F = dyn_cast<Function>(CalleeV)) {
if (F->hasFnAttribute(Attribute::NonLazyBind)) {
LLT Ty = getLLTForType(*F->getType(), DL);
Register Reg = MIRBuilder.buildGlobalValue(Ty, F).getReg(0);
Info.Callee = MachineOperand::CreateReg(Reg, false);
} else {
Info.Callee = MachineOperand::CreateGA(F, 0);
}
} else if (isa<GlobalIFunc>(CalleeV) || isa<GlobalAlias>(CalleeV)) {
// IR IFuncs and Aliases can't be forward declared (only defined), so the
// callee must be in the same TU and therefore we can direct-call it without
// worrying about it being out of range.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3172,6 +3172,11 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
return false;

// ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
// attribute. Check "RtLibUseGOT" instead.
if (MF->getFunction().getParent()->getRtLibUseGOT())
return false;

// Let SDISel handle vararg functions.
if (IsVarArg)
return false;
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8211,13 +8211,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
}
} else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
Subtarget->isTargetMachO()) {
const char *Sym = S->getSymbol();
bool UseGot = (getTargetMachine().getCodeModel() == CodeModel::Large &&
Subtarget->isTargetMachO()) ||
MF.getFunction().getParent()->getRtLibUseGOT();
const char *Sym = S->getSymbol();
if (UseGot) {
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
} else {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
}
Expand Down
11 changes: 6 additions & 5 deletions llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ static cl::opt<bool>
UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
"an address is ignored"), cl::init(false), cl::Hidden);

static cl::opt<bool>
UseNonLazyBind("aarch64-enable-nonlazybind",
cl::desc("Call nonlazybind functions via direct GOT load"),
cl::init(false), cl::Hidden);
static cl::opt<bool> MachOUseNonLazyBind(
"aarch64-macho-enable-nonlazybind",
cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
cl::Hidden);

static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
cl::desc("Enable the use of AA during codegen."));
Expand Down Expand Up @@ -433,7 +433,8 @@ unsigned AArch64Subtarget::classifyGlobalFunctionReference(

// NonLazyBind goes via GOT unless we know it's available locally.
auto *F = dyn_cast<Function>(GV);
if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
F->hasFnAttribute(Attribute::NonLazyBind) &&
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return AArch64II::MO_GOT;

Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1301,8 +1301,17 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
!Subtarget.noBTIAtReturnTwice() &&
MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
Opc = AArch64::BLR_BTI;
else
else {
// For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt)
// is set.
if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) {
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE);
DstOp(getLLTForType(*F.getType(), DL)).addDefToMIB(MRI, MIB);
MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT);
Info.Callee = MachineOperand::CreateReg(MIB.getReg(0), false);
}
Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
}

auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
unsigned CalleeOpNo = 0;
Expand Down
16 changes: 12 additions & 4 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2804,11 +2804,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}

case TargetOpcode::G_GLOBAL_VALUE: {
auto GV = I.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return selectTLSGlobalValue(I, MRI);
const GlobalValue *GV = nullptr;
unsigned OpFlags;
if (I.getOperand(1).isSymbol()) {
OpFlags = I.getOperand(1).getTargetFlags();
// Currently only used by "RtLibUseGOT".
assert(OpFlags == AArch64II::MO_GOT);
} else {
GV = I.getOperand(1).getGlobal();
if (GV->isThreadLocal())
return selectTLSGlobalValue(I, MRI);
OpFlags = STI.ClassifyGlobalReference(GV, TM);
}

unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,9 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
// By splitting this here, we can optimize accesses in the small code model by
// folding in the G_ADD_LOW into the load/store offset.
auto &GlobalOp = MI.getOperand(1);
// Don't modify an intrinsic call.
if (GlobalOp.isSymbol())
return true;
const auto* GV = GlobalOp.getGlobal();
if (GV->isThreadLocal())
return true; // Don't want to modify TLS vars.
Expand Down
36 changes: 28 additions & 8 deletions llvm/test/CodeGen/AArch64/call-rv-marker.ll
Original file line number Diff line number Diff line change
Expand Up @@ -201,17 +201,27 @@ define dso_local void @rv_marker_3() personality ptr @__gxx_personality_v0 {
; GISEL-NEXT: bl _objc_object
; GISEL-NEXT: Ltmp1:
; GISEL-NEXT: ; %bb.1: ; %invoke.cont
; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; GISEL-NEXT: Lloh0:
; GISEL-NEXT: adrp x1, _objc_release@GOTPAGE
; GISEL-NEXT: mov x0, x19
; GISEL-NEXT: Lloh1:
; GISEL-NEXT: ldr x1, [x1, _objc_release@GOTPAGEOFF]
; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; GISEL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; GISEL-NEXT: b _objc_release
; GISEL-NEXT: br x1
; GISEL-NEXT: LBB3_2: ; %lpad
; GISEL-NEXT: Ltmp2:
; GISEL-NEXT: Lloh2:
; GISEL-NEXT: adrp x8, _objc_release@GOTPAGE
; GISEL-NEXT: mov x20, x0
; GISEL-NEXT: mov x0, x19
; GISEL-NEXT: bl _objc_release
; GISEL-NEXT: Lloh3:
; GISEL-NEXT: ldr x8, [x8, _objc_release@GOTPAGEOFF]
; GISEL-NEXT: blr x8
; GISEL-NEXT: mov x0, x20
; GISEL-NEXT: bl __Unwind_Resume
; GISEL-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
; GISEL-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
; GISEL-NEXT: Lfunc_end0:
; GISEL-NEXT: .cfi_endproc
; GISEL-NEXT: .section __TEXT,__gcc_except_tab
Expand Down Expand Up @@ -352,8 +362,12 @@ define dso_local void @rv_marker_4() personality ptr @__gxx_personality_v0 {
; GISEL-NEXT: bl _objc_object
; GISEL-NEXT: Ltmp7:
; GISEL-NEXT: ; %bb.2: ; %invoke.cont2
; GISEL-NEXT: Lloh4:
; GISEL-NEXT: adrp x8, _objc_release@GOTPAGE
; GISEL-NEXT: mov x0, x19
; GISEL-NEXT: bl _objc_release
; GISEL-NEXT: Lloh5:
; GISEL-NEXT: ldr x8, [x8, _objc_release@GOTPAGEOFF]
; GISEL-NEXT: blr x8
; GISEL-NEXT: add x0, sp, #15
; GISEL-NEXT: bl __ZN1SD1Ev
; GISEL-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
Expand All @@ -362,9 +376,13 @@ define dso_local void @rv_marker_4() personality ptr @__gxx_personality_v0 {
; GISEL-NEXT: ret
; GISEL-NEXT: LBB4_3: ; %lpad1
; GISEL-NEXT: Ltmp8:
; GISEL-NEXT: Lloh6:
; GISEL-NEXT: adrp x8, _objc_release@GOTPAGE
; GISEL-NEXT: mov x20, x0
; GISEL-NEXT: mov x0, x19
; GISEL-NEXT: bl _objc_release
; GISEL-NEXT: Lloh7:
; GISEL-NEXT: ldr x8, [x8, _objc_release@GOTPAGEOFF]
; GISEL-NEXT: blr x8
; GISEL-NEXT: b LBB4_5
; GISEL-NEXT: LBB4_4: ; %lpad
; GISEL-NEXT: Ltmp5:
Expand All @@ -374,6 +392,8 @@ define dso_local void @rv_marker_4() personality ptr @__gxx_personality_v0 {
; GISEL-NEXT: bl __ZN1SD1Ev
; GISEL-NEXT: mov x0, x20
; GISEL-NEXT: bl __Unwind_Resume
; GISEL-NEXT: .loh AdrpLdrGot Lloh4, Lloh5
; GISEL-NEXT: .loh AdrpLdrGot Lloh6, Lloh7
; GISEL-NEXT: Lfunc_end1:
; GISEL-NEXT: .cfi_endproc
; GISEL-NEXT: .section __TEXT,__gcc_except_tab
Expand Down Expand Up @@ -467,9 +487,9 @@ define dso_local ptr @rv_marker_5_indirect_call() {
; GISEL-NEXT: .cfi_offset w29, -16
; GISEL-NEXT: .cfi_offset w19, -24
; GISEL-NEXT: .cfi_offset w20, -32
; GISEL-NEXT: Lloh0:
; GISEL-NEXT: Lloh8:
; GISEL-NEXT: adrp x8, _fptr@PAGE
; GISEL-NEXT: Lloh1:
; GISEL-NEXT: Lloh9:
; GISEL-NEXT: ldr x8, [x8, _fptr@PAGEOFF]
; GISEL-NEXT: blr x8
; GISEL-NEXT: mov x29, x29
Expand All @@ -480,7 +500,7 @@ define dso_local ptr @rv_marker_5_indirect_call() {
; GISEL-NEXT: mov x0, x19
; GISEL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; GISEL-NEXT: ret
; GISEL-NEXT: .loh AdrpLdr Lloh0, Lloh1
; GISEL-NEXT: .loh AdrpLdr Lloh8, Lloh9
entry:
%0 = load ptr, ptr @fptr, align 8
%call = call ptr %0() [ "clang.arc.attachedcall"(ptr @objc_retainAutoreleasedReturnValue) ]
Expand Down
81 changes: 38 additions & 43 deletions llvm/test/CodeGen/AArch64/nonlazybind.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-enable-nonlazybind | FileCheck %s --check-prefix=MACHO
; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-macho-enable-nonlazybind | FileCheck %s --check-prefix=MACHO
; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=MACHO-NORMAL
; RUN: llc -mtriple=aarch64 -fast-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-FI
; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-GI
Expand All @@ -19,13 +19,18 @@ define void @test_laziness(ptr %a) nounwind {
; MACHO-NEXT: Lloh1:
; MACHO-NEXT: ldr x8, [x8, _external@GOTPAGEOFF]
; MACHO-NEXT: blr x8
; MACHO-NEXT: Lloh2:
; MACHO-NEXT: adrp x8, _memset@GOTPAGE
; MACHO-NEXT: mov x0, x19
; MACHO-NEXT: mov w1, #1 ; =0x1
; MACHO-NEXT: Lloh3:
; MACHO-NEXT: ldr x8, [x8, _memset@GOTPAGEOFF]
; MACHO-NEXT: mov w2, #1000 ; =0x3e8
; MACHO-NEXT: bl _memset
; MACHO-NEXT: blr x8
; MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; MACHO-NEXT: ret
; MACHO-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
; MACHO-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
;
; MACHO-NORMAL-LABEL: test_laziness:
Expand All @@ -34,50 +39,34 @@ define void @test_laziness(ptr %a) nounwind {
; MACHO-NORMAL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; MACHO-NORMAL-NEXT: mov x19, x0
; MACHO-NORMAL-NEXT: bl _external
; MACHO-NORMAL-NEXT: Lloh0:
; MACHO-NORMAL-NEXT: adrp x8, _memset@GOTPAGE
; MACHO-NORMAL-NEXT: mov x0, x19
; MACHO-NORMAL-NEXT: mov w1, #1 ; =0x1
; MACHO-NORMAL-NEXT: Lloh1:
; MACHO-NORMAL-NEXT: ldr x8, [x8, _memset@GOTPAGEOFF]
; MACHO-NORMAL-NEXT: mov w2, #1000 ; =0x3e8
; MACHO-NORMAL-NEXT: bl _memset
; MACHO-NORMAL-NEXT: blr x8
; MACHO-NORMAL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; MACHO-NORMAL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; MACHO-NORMAL-NEXT: ret
; MACHO-NORMAL-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
;
; ELF-FI-LABEL: test_laziness:
; ELF-FI: // %bb.0:
; ELF-FI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; ELF-FI-NEXT: mov x19, x0
; ELF-FI-NEXT: bl external
; ELF-FI-NEXT: mov w8, #1 // =0x1
; ELF-FI-NEXT: mov x0, x19
; ELF-FI-NEXT: mov x2, #1000 // =0x3e8
; ELF-FI-NEXT: uxtb w1, w8
; ELF-FI-NEXT: bl memset
; ELF-FI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; ELF-FI-NEXT: ret
;
; ELF-GI-LABEL: test_laziness:
; ELF-GI: // %bb.0:
; ELF-GI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; ELF-GI-NEXT: mov x19, x0
; ELF-GI-NEXT: bl external
; ELF-GI-NEXT: mov x0, x19
; ELF-GI-NEXT: mov w1, #1 // =0x1
; ELF-GI-NEXT: mov w2, #1000 // =0x3e8
; ELF-GI-NEXT: bl memset
; ELF-GI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; ELF-GI-NEXT: ret
;
; ELF-SDAG-LABEL: test_laziness:
; ELF-SDAG: // %bb.0:
; ELF-SDAG-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; ELF-SDAG-NEXT: mov x19, x0
; ELF-SDAG-NEXT: bl external
; ELF-SDAG-NEXT: mov x0, x19
; ELF-SDAG-NEXT: mov w1, #1 // =0x1
; ELF-SDAG-NEXT: mov w2, #1000 // =0x3e8
; ELF-SDAG-NEXT: bl memset
; ELF-SDAG-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; ELF-SDAG-NEXT: ret
; ELF-LABEL: test_laziness:
; ELF: // %bb.0:
; ELF-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; ELF-NEXT: adrp x8, :got:external
; ELF-NEXT: mov x19, x0
; ELF-NEXT: ldr x8, [x8, :got_lo12:external]
; ELF-NEXT: blr x8
; ELF-NEXT: adrp x8, :got:memset
; ELF-NEXT: mov x0, x19
; ELF-NEXT: mov w1, #1 // =0x1
; ELF-NEXT: ldr x8, [x8, :got_lo12:memset]
; ELF-NEXT: mov w2, #1000 // =0x3e8
; ELF-NEXT: blr x8
; ELF-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; ELF-NEXT: ret
call void @external()
call void @llvm.memset.p0.i64(ptr align 1 %a, i8 1, i64 1000, i1 false)
ret void
Expand All @@ -86,20 +75,22 @@ define void @test_laziness(ptr %a) nounwind {
define void @test_laziness_tail() nounwind {
; MACHO-LABEL: test_laziness_tail:
; MACHO: ; %bb.0:
; MACHO-NEXT: Lloh2:
; MACHO-NEXT: Lloh4:
; MACHO-NEXT: adrp x0, _external@GOTPAGE
; MACHO-NEXT: Lloh3:
; MACHO-NEXT: Lloh5:
; MACHO-NEXT: ldr x0, [x0, _external@GOTPAGEOFF]
; MACHO-NEXT: br x0
; MACHO-NEXT: .loh AdrpLdrGot Lloh2, Lloh3
; MACHO-NEXT: .loh AdrpLdrGot Lloh4, Lloh5
;
; MACHO-NORMAL-LABEL: test_laziness_tail:
; MACHO-NORMAL: ; %bb.0:
; MACHO-NORMAL-NEXT: b _external
;
; ELF-LABEL: test_laziness_tail:
; ELF: // %bb.0:
; ELF-NEXT: b external
; ELF-NEXT: adrp x0, :got:external
; ELF-NEXT: ldr x0, [x0, :got_lo12:external]
; ELF-NEXT: br x0
tail call void @external()
ret void
}
Expand All @@ -108,3 +99,7 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)

!llvm.module.flags = !{!0}
!0 = !{i32 7, !"RtLibUseGOT", i32 1}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; ELF-FI: {{.*}}
; ELF-GI: {{.*}}
; ELF-SDAG: {{.*}}

0 comments on commit 201572e

Please sign in to comment.