diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 7b7dc1b46dd80..1e10f91ee3d7f 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -946,6 +946,10 @@ class TargetTransformInfo { /// should use coldcc calling convention. LLVM_ABI bool useColdCCForColdCall(Function &F) const; + /// Return true if the input function is internal, should use fastcc calling + /// convention. + LLVM_ABI bool useFastCCForInternalCall(Function &F) const; + LLVM_ABI bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const; /// Identifies if the vector form of the intrinsic has a scalar operand. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 4cd607c0d0c8d..064e28c504af4 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -431,6 +431,8 @@ class TargetTransformInfoImplBase { virtual bool useColdCCForColdCall(Function &F) const { return false; } + virtual bool useFastCCForInternalCall(Function &F) const { return true; } + virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const { return false; } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index c47a1c1b23a37..c63f57d4ad1c6 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -609,6 +609,10 @@ bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { return TTIImpl->useColdCCForColdCall(F); } +bool TargetTransformInfo::useFastCCForInternalCall(Function &F) const { + return TTIImpl->useFastCCForInternalCall(F); +} + bool TargetTransformInfo::isTargetIntrinsicTriviallyScalarizable( Intrinsic::ID ID) const { return TTIImpl->isTargetIntrinsicTriviallyScalarizable(ID); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 3d8d0a236a3c1..ad0f077d25377 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -7223,3 +7223,19 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, return false; } + +bool X86TTIImpl::useFastCCForInternalCall(Function &F) const { + bool HasEGPR = ST->hasEGPR(); + const TargetMachine &TM = getTLI()->getTargetMachine(); + + for (User *U : F.users()) { + CallBase *CB = dyn_cast(U); + if (!CB || CB->getCalledOperand() != &F) + continue; + Function *CallerFunc = CB->getParent()->getParent(); + if (TM.getSubtarget(*CallerFunc).hasEGPR() != HasEGPR) + return false; + } + + return true; +} diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 133b3668a46c8..32d5e301a59cc 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -319,6 +319,8 @@ class X86TTIImpl final : public BasicTTIImplBase { unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const override; + bool useFastCCForInternalCall(Function &F) const override; + private: bool supportsGather() const; InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 99c4982c58b47..1516a5bb7a6c2 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2018,12 +2018,15 @@ OptimizeFunctions(Module &M, if (hasChangeableCC(&F, ChangeableCCCache)) { // If this function has a calling convention worth changing, is not a - // varargs function, and is only called directly, promote it to use the - // Fast calling convention. - F.setCallingConv(CallingConv::Fast); - ChangeCalleesToFastCall(&F); - ++NumFastCallFns; - Changed = true; + // varargs function, is only called directly, and is supported by the + // target, promote it to use the Fast calling convention. + TargetTransformInfo &TTI = GetTTI(F); + if (TTI.useFastCCForInternalCall(F)) { + F.setCallingConv(CallingConv::Fast); + ChangeCalleesToFastCall(&F); + ++NumFastCallFns; + Changed = true; + } } if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) && diff --git a/llvm/test/Transforms/GlobalOpt/X86/apx.ll b/llvm/test/Transforms/GlobalOpt/X86/apx.ll new file mode 100644 index 0000000000000..aaf6abac966e8 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/X86/apx.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -mtriple=x86_64 -S -passes=globalopt -o - < %s | FileCheck %s + +define void @caller1() { +; CHECK-LABEL: define void @caller1() local_unnamed_addr { +; CHECK-NEXT: call void @callee1() +; CHECK-NEXT: ret void +; + call void @callee1() + ret void +} + +define internal void @callee1() "target-features"="+egpr" { +; CHECK-LABEL: define internal void @callee1( +; CHECK-SAME: ) unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +define void @caller2() "target-features"="+egpr" { +; CHECK-LABEL: define void @caller2( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: call void @callee2() +; CHECK-NEXT: ret void +; + call void @callee2() + ret void +} + +define internal void @callee2() { +; CHECK-LABEL: define internal void @callee2() unnamed_addr { +; CHECK-NEXT: ret void +; + ret void +} + +define void @caller3() "target-features"="+egpr" { +; CHECK-LABEL: define void @caller3( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: call fastcc void @callee3() +; CHECK-NEXT: ret void +; + call void @callee3() + ret void +} + +define internal void @callee3() "target-features"="+egpr" { +; CHECK-LABEL: define internal fastcc void @callee3( +; CHECK-SAME: ) unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + ret void +}