From 870c34f0cfe0678dedcec33770fe4304b60c2e0c Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Mon, 4 Apr 2016 18:56:13 +0000 Subject: [PATCH] ARM, AArch64, X86: Check preserved registers for tail calls. We can only perform a tail call to a callee that preserves all the registers that the caller needs to preserve. This situation happens with calling conventions like preserver_mostcc or cxx_fast_tls. It was explicitely handled for fast_tls and failing for preserve_most. This patch generalizes the check to any calling convention. Related to rdar://24207743 Differential Revision: http://reviews.llvm.org/D18680 llvm-svn: 265329 --- llvm/include/llvm/Target/TargetRegisterInfo.h | 4 ++++ llvm/lib/CodeGen/TargetRegisterInfo.cpp | 9 +++++++ .../Target/AArch64/AArch64ISelLowering.cpp | 14 +++++------ llvm/lib/Target/ARM/ARMISelLowering.cpp | 15 ++++++------ llvm/lib/Target/X86/X86ISelLowering.cpp | 14 +++++------ .../CodeGen/AArch64/tailcall-ccmismatch.ll | 24 +++++++++++++++++++ llvm/test/CodeGen/ARM/cxx-tlscc.ll | 22 +++++++++++++++++ 7 files changed, 80 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll diff --git a/llvm/include/llvm/Target/TargetRegisterInfo.h b/llvm/include/llvm/Target/TargetRegisterInfo.h index 90d1dd5d24cda..ff5147e6f1bd3 100644 --- a/llvm/include/llvm/Target/TargetRegisterInfo.h +++ b/llvm/include/llvm/Target/TargetRegisterInfo.h @@ -460,6 +460,10 @@ class TargetRegisterInfo : public MCRegisterInfo { llvm_unreachable("target does not provide no preserved mask"); } + /// Return true if all bits that are set in mask \p mask0 are also set in + /// \p mask1. + bool regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const; + /// Return all the call-preserved register masks defined for this target. virtual ArrayRef getRegMasks() const = 0; virtual ArrayRef getRegMaskNames() const = 0; diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 0a7042ac3db54..fc88629b5c8ff 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -388,6 +388,15 @@ bool TargetRegisterInfo::needsStackRealignment( return false; } +bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, + const uint32_t *mask1) const { + unsigned N = (getNumRegs()+31) / 32; + for (unsigned I = 0; I < N; ++I) + if ((mask0[I] & mask1[I]) != mask0[I]) + return false; + return true; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 318f7c9022066..6d8f3eebff3eb 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2816,13 +2816,6 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; - // Disable tailcall for CXX_FAST_TLS when callee and caller have different - // calling conventions, given that CXX_FAST_TLS has a bigger CSR set. - if (!CCMatch && - (CallerCC == CallingConv::CXX_FAST_TLS || - CalleeCC == CallingConv::CXX_FAST_TLS)) - return false; - // Byval parameters hand the function a pointer directly into the stack area // we want to reuse during a tail call. Working around this *is* possible (see // X86) but less efficient and uglier in LowerCall. @@ -2882,6 +2875,13 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( CCAssignFnForCall(CalleeCC, isVarArg), CCAssignFnForCall(CallerCC, isVarArg))) return false; + // The callee has to preserve all registers the caller needs to preserve. + if (!CCMatch) { + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), + TRI->getCallPreservedMask(MF, CalleeCC))) + return false; + } // Nothing more to check if the callee is taking no arguments if (Outs.empty()) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 908f40db58722..f950adb9159a0 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2101,14 +2101,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF->getCallingConv(); - bool CCMatch = CallerCC == CalleeCC; - - // Disable tailcall for CXX_FAST_TLS when callee and caller have different - // calling conventions, given that CXX_FAST_TLS has a bigger CSR set. - if (!CCMatch && - (CallerCC == CallingConv::CXX_FAST_TLS || - CalleeCC == CallingConv::CXX_FAST_TLS)) - return false; assert(Subtarget->supportsTailCall()); @@ -2152,6 +2144,13 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CCAssignFnForNode(CalleeCC, true, isVarArg), CCAssignFnForNode(CallerCC, true, isVarArg))) return false; + // The callee has to preserve all registers the caller needs to preserve. + if (CalleeCC != CallerCC) { + const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); + if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), + TRI->getCallPreservedMask(MF, CalleeCC))) + return false; + } // If Caller's vararg or byval argument has been split between registers and // stack, do not perform tail call, since part of the argument is in caller's diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8c17a47674daa..9ba8f1e8e0110 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3818,13 +3818,6 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( if (IsCalleeWin64 != IsCallerWin64) return false; - // Disable tailcall for CXX_FAST_TLS when callee and caller have different - // calling conventions, given that CXX_FAST_TLS has a bigger CSR set. - if (!CCMatch && - (CallerCC == CallingConv::CXX_FAST_TLS || - CalleeCC == CallingConv::CXX_FAST_TLS)) - return false; - if (DAG.getTarget().Options.GuaranteedTailCallOpt) { if (canGuaranteeTCO(CalleeCC) && CCMatch) return true; @@ -3888,6 +3881,13 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, RetCC_X86, RetCC_X86)) return false; + // The callee has to preserve all registers the caller needs to preserve. + if (!CCMatch) { + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + if (!TRI->regmaskSubsetEqual(TRI->getCallPreservedMask(MF, CallerCC), + TRI->getCallPreservedMask(MF, CalleeCC))) + return false; + } unsigned StackArgsSize = 0; diff --git a/llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll b/llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll new file mode 100644 index 0000000000000..ab96e609dd468 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/tailcall-ccmismatch.ll @@ -0,0 +1,24 @@ +; RUN: llc -o - %s | FileCheck %s +target triple="aarch64--" + +declare void @somefunc() +define preserve_mostcc void @test_ccmismatch_notail() { +; Ensure that no tail call is used here, as the called function somefunc does +; not preserve enough registers for preserve_mostcc. +; CHECK-LABEL: test_ccmismatch_notail: +; CHECK-NOT: b somefunc +; CHECK: bl somefunc + tail call void @somefunc() + ret void +} + +declare preserve_mostcc void @some_preserve_most_func() +define void @test_ccmismatch_tail() { +; We can perform a tail call here, because some_preserve_most_func preserves +; all registers necessary for test_ccmismatch_tail. +; CHECK-LABEL: test_ccmismatch_tail: +; CHECK-NOT: bl some_preserve_most_func +; CHECK: b some_preserve_most_func + tail call preserve_mostcc void @some_preserve_most_func() + ret void +} diff --git a/llvm/test/CodeGen/ARM/cxx-tlscc.ll b/llvm/test/CodeGen/ARM/cxx-tlscc.ll index 48cce4f01be91..d49c61ab09368 100644 --- a/llvm/test/CodeGen/ARM/cxx-tlscc.ll +++ b/llvm/test/CodeGen/ARM/cxx-tlscc.ll @@ -126,5 +126,27 @@ entry: ret void } +declare void @somefunc() +define cxx_fast_tlscc void @test_ccmismatch_notail() { +; A tail call is not possible here because somefunc does not preserve enough +; registers. +; CHECK-LABEL: test_ccmismatch_notail: +; CHECK-NOT: b _somefunc +; CHECK: bl _somefunc + tail call void @somefunc() + ret void +} + +declare cxx_fast_tlscc void @some_fast_tls_func() +define void @test_ccmismatch_tail() { +; We can perform a tail call here because some_fast_tls_func preserves all +; necessary registers (and more). +; CHECK-LABEL: test_ccmismatch_tail: +; CHECK-NOT: bl _some_fast_tls_func +; CHECK: b _some_fast_tls_func + tail call cxx_fast_tlscc void @some_fast_tls_func() + ret void +} + attributes #0 = { nounwind "no-frame-pointer-elim"="true" } attributes #1 = { nounwind }