Skip to content

Commit

Permalink
ARM, AArch64, X86: Check preserved registers for tail calls.
Browse files Browse the repository at this point in the history
We can only perform a tail call to a callee that preserves all the
registers that the caller needs to preserve.

This situation happens with calling conventions like preserver_mostcc or
cxx_fast_tls. It was explicitely handled for fast_tls and failing for
preserve_most. This patch generalizes the check to any calling
convention.

Related to rdar://24207743

Differential Revision: http://reviews.llvm.org/D18680

llvm-svn: 265329
  • Loading branch information
MatzeB committed Apr 4, 2016
1 parent eb3219a commit 870c34f
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 22 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/Target/TargetRegisterInfo.h
Expand Up @@ -460,6 +460,10 @@ class TargetRegisterInfo : public MCRegisterInfo {
llvm_unreachable("target does not provide no preserved mask");
}

/// Return true if all bits that are set in mask \p mask0 are also set in
/// \p mask1.
bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const;

/// Return all the call-preserved register masks defined for this target.
virtual ArrayRef<const uint32_t *> getRegMasks() const = 0;
virtual ArrayRef<const char *> getRegMaskNames() const = 0;
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/TargetRegisterInfo.cpp
Expand Up @@ -388,6 +388,15 @@ bool TargetRegisterInfo::needsStackRealignment(
return false;
}

bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
const uint32_t *mask1) const {
unsigned N = (getNumRegs()+31) / 32;
for (unsigned I = 0; I < N; ++I)
if ((mask0[I] & mask1[I]) != mask0[I])
return false;
return true;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void
TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -2816,13 +2816,6 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;

// Disable tailcall for CXX_FAST_TLS when callee and caller have different
// calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
if (!CCMatch &&
(CallerCC == CallingConv::CXX_FAST_TLS ||
CalleeCC == CallingConv::CXX_FAST_TLS))
return false;

// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
Expand Down Expand Up @@ -2882,6 +2875,13 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
CCAssignFnForCall(CalleeCC, isVarArg),
CCAssignFnForCall(CallerCC, isVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
if (!CCMatch) {
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
TRI->getCallPreservedMask(MF, CalleeCC)))
return false;
}

// Nothing more to check if the callee is taking no arguments
if (Outs.empty())
Expand Down
15 changes: 7 additions & 8 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -2101,14 +2101,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;

// Disable tailcall for CXX_FAST_TLS when callee and caller have different
// calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
if (!CCMatch &&
(CallerCC == CallingConv::CXX_FAST_TLS ||
CalleeCC == CallingConv::CXX_FAST_TLS))
return false;

assert(Subtarget->supportsTailCall());

Expand Down Expand Up @@ -2152,6 +2144,13 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CCAssignFnForNode(CalleeCC, true, isVarArg),
CCAssignFnForNode(CallerCC, true, isVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
if (CalleeCC != CallerCC) {
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
TRI->getCallPreservedMask(MF, CalleeCC)))
return false;
}

// If Caller's vararg or byval argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -3818,13 +3818,6 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (IsCalleeWin64 != IsCallerWin64)
return false;

// Disable tailcall for CXX_FAST_TLS when callee and caller have different
// calling conventions, given that CXX_FAST_TLS has a bigger CSR set.
if (!CCMatch &&
(CallerCC == CallingConv::CXX_FAST_TLS ||
CalleeCC == CallingConv::CXX_FAST_TLS))
return false;

if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
Expand Down Expand Up @@ -3888,6 +3881,13 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
RetCC_X86, RetCC_X86))
return false;
// The callee has to preserve all registers the caller needs to preserve.
if (!CCMatch) {
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC),
TRI->getCallPreservedMask(MF, CalleeCC)))
return false;
}

unsigned StackArgsSize = 0;

Expand Down
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll
@@ -0,0 +1,24 @@
; RUN: llc -o - %s | FileCheck %s
target triple="aarch64--"

declare void @somefunc()
define preserve_mostcc void @test_ccmismatch_notail() {
; Ensure that no tail call is used here, as the called function somefunc does
; not preserve enough registers for preserve_mostcc.
; CHECK-LABEL: test_ccmismatch_notail:
; CHECK-NOT: b somefunc
; CHECK: bl somefunc
tail call void @somefunc()
ret void
}

declare preserve_mostcc void @some_preserve_most_func()
define void @test_ccmismatch_tail() {
; We can perform a tail call here, because some_preserve_most_func preserves
; all registers necessary for test_ccmismatch_tail.
; CHECK-LABEL: test_ccmismatch_tail:
; CHECK-NOT: bl some_preserve_most_func
; CHECK: b some_preserve_most_func
tail call preserve_mostcc void @some_preserve_most_func()
ret void
}
22 changes: 22 additions & 0 deletions llvm/test/CodeGen/ARM/cxx-tlscc.ll
Expand Up @@ -126,5 +126,27 @@ entry:
ret void
}

declare void @somefunc()
define cxx_fast_tlscc void @test_ccmismatch_notail() {
; A tail call is not possible here because somefunc does not preserve enough
; registers.
; CHECK-LABEL: test_ccmismatch_notail:
; CHECK-NOT: b _somefunc
; CHECK: bl _somefunc
tail call void @somefunc()
ret void
}

declare cxx_fast_tlscc void @some_fast_tls_func()
define void @test_ccmismatch_tail() {
; We can perform a tail call here because some_fast_tls_func preserves all
; necessary registers (and more).
; CHECK-LABEL: test_ccmismatch_tail:
; CHECK-NOT: bl _some_fast_tls_func
; CHECK: b _some_fast_tls_func
tail call cxx_fast_tlscc void @some_fast_tls_func()
ret void
}

attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
attributes #1 = { nounwind }

0 comments on commit 870c34f

Please sign in to comment.