diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index 5a682e8c7b5eb..84bdd5c2379f1 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -93,11 +93,14 @@ #include "llvm/Transforms/Utils/SizeOpts.h" namespace llvm { -// Map of potential specializations for each function. The FunctionSpecializer -// keeps the discovered specialisation opportunities for the module in a single -// vector, where the specialisations of each function form a contiguous range. -// This map's value is the beginning and the end of that range. -using SpecMap = DenseMap>; +struct Spec; + +// Map of potential specializations for each function. +using SpecMap = DenseMap>; + +using CallUserT = SmallMapVector< + CallBase *, + std::pair, 4>, Function *>, 4>; // Just a shorter abbreviation to improve indentation. using Cost = InstructionCost; @@ -124,6 +127,14 @@ struct SpecSig { } }; +enum CallSiteStatusT { AWAITING_PARENT, HAS_PARENT, NO_PARENT }; + +struct SpecCall { + CallBase *CallSite; + CallSiteStatusT Status; + unsigned Parent; +}; + // Specialization instance. struct Spec { // Original function. @@ -141,13 +152,47 @@ struct Spec { // Number of instructions in the specialization. unsigned CodeSize; + // Cumulative function size of the chain + unsigned FuncSize; + + // Latency savings + unsigned Latency; + + // Benefit from inlining + unsigned InlineScore; + // List of call sites, matching this specialization. - SmallVector CallSites; + SmallVector CallSites; - Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize) - : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} - Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize) - : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} + bool AllChains = true; + + void addCall(SpecCall SC) { + CallSites.push_back(SC); + AllChains = AllChains && SC.Status != CallSiteStatusT::NO_PARENT; + } + + // List Sub-Specializations + SmallVector SubSpecs; + + // Index within AllSpecs + unsigned Loc = 0; + + bool SpecializeOnOwn = true; + + Spec(Function *F, CallBase *CallSite, const SpecSig &S, + CallSiteStatusT Status) + : F(F), Clone(nullptr), Sig(S), Score(), CodeSize(), FuncSize(), + InlineScore(), CallSites() { + addCall({CallSite, Status, /*Parent*/ 0}); + } + Spec(Function *F, CallBase *CallSite, CallSiteStatusT Status) + : F(F), Clone(nullptr), Sig(), Score(), CodeSize(), FuncSize(), + InlineScore(), CallSites() { + addCall({CallSite, Status, /*Parent*/ 0}); + } + Spec(Function *F) + : F(F), Clone(nullptr), Sig(), Score(), CodeSize(), FuncSize(), + InlineScore(), CallSites() {} }; class InstCostVisitor : public InstVisitor { @@ -180,9 +225,11 @@ class InstCostVisitor : public InstVisitor { return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB); } - LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C); + LLVM_ABI Cost getCodeSizeSavingsForArg(Argument *A, Constant *C, + CallUserT *CallUsers = nullptr); - LLVM_ABI Cost getCodeSizeSavingsFromPendingPHIs(); + LLVM_ABI Cost + getCodeSizeSavingsFromPendingPHIs(CallUserT *CallUsers = nullptr); LLVM_ABI Cost getLatencySavingsForKnownConstants(); @@ -194,7 +241,9 @@ class InstCostVisitor : public InstVisitor { bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const; Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr, - Constant *C = nullptr); + Constant *C = nullptr, + CallUserT *CallUsers = nullptr, + llvm::Use *UseEdge = nullptr); Cost estimateBasicBlocks(SmallVectorImpl &WorkList); Cost estimateSwitchInst(SwitchInst &I); @@ -247,6 +296,7 @@ class FunctionSpecializer { SmallPtrSet Specializations; SmallPtrSet DeadFunctions; + SmallPtrSet VisitedFunctions; DenseMap FunctionMetrics; DenseMap FunctionGrowth; unsigned NGlobals = 0; @@ -295,9 +345,26 @@ class FunctionSpecializer { /// @param FuncSize Cost of specializing a function. /// @param AllSpecs A vector to add potential specializations to. /// @param SM A map for a function's specialisation range + /// @param CurrentChain Current chain of function calls. + /// @return True, if any potential specializations were found + bool findSpecializations(unsigned FuncSize, SmallVectorImpl &AllSpecs, + SpecMap &SM, Spec &InS, + DenseMap &UniqueSpecs, + SmallPtrSet &CurrentChain); + + /// @brief Find specialization opportunities for a given function. + /// @param S Specialization to complete, possibly with a Callsite attached. + /// @param Chained Is this call part of a chain build? + /// @param SM A map for a function's specialisation range + /// @param AllSpecs A vector to add potential specializations to. + /// @param UniqueSpecs Map of existing specializations. + /// @param CurrentChain Current chain of function calls. + /// site. /// @return True, if any potential specializations were found - bool findSpecializations(Function *F, unsigned FuncSize, - SmallVectorImpl &AllSpecs, SpecMap &SM); + bool runOneSpec(Spec &S, bool Chained, SpecMap &SM, + SmallVectorImpl &AllSpecs, + DenseMap &UniqueSpecs, + SmallPtrSet CurrentChain); /// Compute the inlining bonus for replacing argument \p A with constant \p C. unsigned getInliningBonus(Argument *A, Constant *C); @@ -308,7 +375,8 @@ class FunctionSpecializer { /// @param F Function to specialize /// @param S Which specialization to create /// @return The new, cloned function - Function *createSpecialization(Function *F, const SpecSig &S); + Function *createSpecialization(Function *F, const SpecSig &S, + ValueToValueMapTy &Mappings); /// Determine if it is possible to specialise the function for constant values /// of the formal parameter \p A. @@ -320,9 +388,9 @@ class FunctionSpecializer { /// @brief Find and update calls to \p F, which match a specialization /// @param F Orginal function - /// @param Begin Start of a range of possibly matching specialisations - /// @param End End of a range (exclusive) of possibly matching specialisations - void updateCallSites(Function *F, const Spec *Begin, const Spec *End); + /// @param Specs Vector of possibly matching specialisations + void updateCallSites(Function *F, const SmallVector &Specs, + SmallVector AllSpecs); }; } // namespace llvm diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 150a2dc5d48e2..746193eb2e547 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -154,27 +154,31 @@ Constant *InstCostVisitor::findConstantFor(Value *V) const { return KnownConstants.lookup(V); } -Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs() { +Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs(CallUserT *CallUsers) { Cost CodeSize; while (!PendingPHIs.empty()) { Instruction *Phi = PendingPHIs.pop_back_val(); // The pending PHIs could have been proven dead by now. if (isBlockExecutable(Phi->getParent())) - CodeSize += getCodeSizeSavingsForUser(Phi); + CodeSize += + getCodeSizeSavingsForUser(Phi, nullptr, nullptr, CallUsers, nullptr); } return CodeSize; } /// Compute the codesize savings for replacing argument \p A with constant \p C. -Cost InstCostVisitor::getCodeSizeSavingsForArg(Argument *A, Constant *C) { +Cost InstCostVisitor::getCodeSizeSavingsForArg(Argument *A, Constant *C, + CallUserT *CallUsers) { LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " << C->getNameOrAsOperand() << "\n"); Cost CodeSize; - for (auto *U : A->users()) - if (auto *UI = dyn_cast(U)) + for (Use &UseEdge : A->uses()) { + User *U = UseEdge.getUser(); + if (auto *UI = dyn_cast(U)) { if (isBlockExecutable(UI->getParent())) - CodeSize += getCodeSizeSavingsForUser(UI, A, C); - + CodeSize += getCodeSizeSavingsForUser(UI, A, C, CallUsers, &UseEdge); + } + } LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = " << CodeSize << "} for argument " << *A << "\n"); return CodeSize; @@ -217,7 +221,9 @@ Cost InstCostVisitor::getLatencySavingsForKnownConstants() { } Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use, - Constant *C) { + Constant *C, + CallUserT *CallUsers, + llvm::Use *UseEdge) { // We have already propagated a constant for this user. if (KnownConstants.contains(User)) return 0; @@ -227,10 +233,45 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use, : KnownConstants.end(); Cost CodeSize = 0; + auto isChainableCall = [&](Instruction *I) -> bool { + if (!CallUsers || !UseEdge) + return false; + if (CallInst *CI = dyn_cast(I); + CI && CI->getIntrinsicID() == llvm::Intrinsic::not_intrinsic) { + LLVM_DEBUG( + dbgs() << "FnSpecialization: Found constant forwarded via a call " + << *C << "\n"); + Function *F = CI->getCalledFunction(); + if (F) { // Avoid function pointers + unsigned Idx = CI->getArgOperandNo(UseEdge); + LLVM_DEBUG(dbgs() << "FnSpecialization: Function called: " + << F->getName() << " argument number: " << Idx + << "\n"); + (*CallUsers)[CI].first.push_back({Idx, C}); + (*CallUsers)[CI].second = F; + return true; + } else if (Use == CI->getCalledOperand()) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Found call to constant " + "function pointer.\n"); + Function *CF = dyn_cast(C); + assert(CF && "Indirect call to a non-Function type"); + (*CallUsers)[CI].second = CF; + } else { + LLVM_DEBUG( + dbgs() << "FnSpecialization: Could not find call function.\n"); + unsigned Idx = CI->getArgOperandNo(UseEdge); + (*CallUsers)[CI].first.push_back({Idx, C}); + } + } + return false; + }; if (auto *I = dyn_cast(User)) { CodeSize = estimateSwitchInst(*I); } else if (auto *I = dyn_cast(User)) { CodeSize = estimateBranchInst(*I); + } else if (isChainableCall(User)) { + // Will get benefit from recusive call to findSpecializations() + return 0; } else { C = visit(*User); if (!C) @@ -246,11 +287,12 @@ Cost InstCostVisitor::getCodeSizeSavingsForUser(Instruction *User, Value *Use, LLVM_DEBUG(dbgs() << "FnSpecialization: {CodeSize = " << CodeSize << "} for user " << *User << "\n"); - - for (auto *U : User->users()) + for (llvm::Use &UE : User->uses()) { + llvm::User *U = UE.getUser(); if (auto *UI = dyn_cast(U)) if (UI != User && isBlockExecutable(UI->getParent())) - CodeSize += getCodeSizeSavingsForUser(UI, User, C); + CodeSize += getCodeSizeSavingsForUser(UI, User, C, CallUsers, &UE); + } return CodeSize; } @@ -668,6 +710,82 @@ static unsigned getCostValue(const Cost &C) { return static_cast(Value); } +bool FunctionSpecializer::runOneSpec(Spec &S, bool Chained, SpecMap &SM, + SmallVectorImpl &AllSpecs, + DenseMap &UniqueSpecs, + SmallPtrSet CurrentChain) { + Function &F = *(S.F); + if (!isCandidateFunction(&F)) + return false; + + LLVM_DEBUG(dbgs() << "FnSpecialization: Trying function " << F.getName() + << ", Chain=" << Chained << "\n"); + + auto [It, Inserted] = FunctionMetrics.try_emplace(&F); + CodeMetrics &Metrics = It->second; + // Analyze the function. + if (Inserted) { + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(&F, &GetAC(F), EphValues); + for (BasicBlock &BB : F) + Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues); + } + + // When specializing literal constants is enabled, always require functions + // to be larger than MinFunctionSize, to prevent excessive specialization. + const bool RequireMinSize = + !ForceSpecialization && + (SpecializeLiteralConstant || !F.hasFnAttribute(Attribute::NoInline)); + + // If the code metrics reveal that we shouldn't duplicate the function, + // or if the code size implies that this function is easy to get inlined, + // then we shouldn't specialize it. + if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid()) + return false; + + if (RequireMinSize && Metrics.NumInsts < MinFunctionSize) { + if (Chained) { + // Want to specialize as part of chain still so we can more accurately + // assess the chain specialization + S.SpecializeOnOwn = false; + } else { + return false; + } + } + + // When specialization on literal constants is disabled, only consider + // recursive functions when running multiple times to save wasted analysis, + // as we will not be able to specialize on any newly found literal constant + // return values. + if (!Chained && !SpecializeLiteralConstant && VisitedFunctions.contains(&F) && + !Metrics.isRecursive) + return false; + + // Don't want to mistake this chain for checking all of the CallSites for F + if (!Chained) + VisitedFunctions.insert(&F); + + int64_t Sz = Metrics.NumInsts.getValue(); + assert(Sz > 0 && "CodeSize should be positive"); + // It is safe to down cast from int64_t, NumInsts is always positive. + unsigned FuncSize = static_cast(Sz); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " + << F.getName() << " is " << FuncSize << "\n"); + + if (Inserted && Metrics.isRecursive) + promoteConstantStackValues(&F); + + if (!findSpecializations(FuncSize, AllSpecs, SM, S, UniqueSpecs, + CurrentChain)) { + LLVM_DEBUG( + dbgs() << "FnSpecialization: No possible specializations found for " + << F.getName() << "\n"); + return false; + } + return true; +} + /// Attempt to specialize functions in the module to enable constant /// propagation across function boundaries. /// @@ -676,63 +794,24 @@ bool FunctionSpecializer::run() { // Find possible specializations for each function. SpecMap SM; SmallVector AllSpecs; + // A mapping from a specialisation signature to the index of the respective + // entry in the all specialisation array. Used to ensure uniqueness of + // specialisations. + DenseMap UniqueSpecs; unsigned NumCandidates = 0; for (Function &F : M) { - if (!isCandidateFunction(&F)) - continue; - - auto [It, Inserted] = FunctionMetrics.try_emplace(&F); - CodeMetrics &Metrics = It->second; - //Analyze the function. - if (Inserted) { - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(&F, &GetAC(F), EphValues); - for (BasicBlock &BB : F) - Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues); - } - - // When specializing literal constants is enabled, always require functions - // to be larger than MinFunctionSize, to prevent excessive specialization. - const bool RequireMinSize = - !ForceSpecialization && - (SpecializeLiteralConstant || !F.hasFnAttribute(Attribute::NoInline)); - - // If the code metrics reveal that we shouldn't duplicate the function, - // or if the code size implies that this function is easy to get inlined, - // then we shouldn't specialize it. - if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || - (RequireMinSize && Metrics.NumInsts < MinFunctionSize)) - continue; - - // When specialization on literal constants is disabled, only consider - // recursive functions when running multiple times to save wasted analysis, - // as we will not be able to specialize on any newly found literal constant - // return values. - if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive) - continue; - - int64_t Sz = Metrics.NumInsts.getValue(); - assert(Sz > 0 && "CodeSize should be positive"); - // It is safe to down cast from int64_t, NumInsts is always positive. - unsigned FuncSize = static_cast(Sz); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " - << F.getName() << " is " << FuncSize << "\n"); - - if (Inserted && Metrics.isRecursive) - promoteConstantStackValues(&F); - - if (!findSpecializations(&F, FuncSize, AllSpecs, SM)) { - LLVM_DEBUG( - dbgs() << "FnSpecialization: No possible specializations found for " - << F.getName() << "\n"); - continue; - } - - ++NumCandidates; + Spec S(&F); + SmallPtrSet CurrentChain; + if (runOneSpec(S, /*Chained*/ false, SM, AllSpecs, UniqueSpecs, + CurrentChain)) + ++NumCandidates; } - if (!NumCandidates) { + unsigned IndepSpecs = 0; + for (auto &S : AllSpecs) + if (S.SpecializeOnOwn && !S.AllChains) + ++IndepSpecs; + if (!NumCandidates || !IndepSpecs) { LLVM_DEBUG( dbgs() << "FnSpecialization: No possible specializations found in module\n"); @@ -743,12 +822,16 @@ bool FunctionSpecializer::run() { // specialization budget, which is derived from maximum number of // specializations per specialization candidate function. auto CompareScore = [&AllSpecs](unsigned I, unsigned J) { + if (!AllSpecs[J].SpecializeOnOwn || AllSpecs[J].AllChains) + return true; + if (!AllSpecs[I].SpecializeOnOwn || AllSpecs[I].AllChains) + return false; if (AllSpecs[I].Score != AllSpecs[J].Score) return AllSpecs[I].Score > AllSpecs[J].Score; return I > J; }; - const unsigned NSpecs = - std::min(NumCandidates * MaxClones, unsigned(AllSpecs.size())); + const unsigned NSpecs = std::min( + {NumCandidates * MaxClones, unsigned(AllSpecs.size()), IndepSpecs}); SmallVector BestSpecs(NSpecs + 1); std::iota(BestSpecs.begin(), BestSpecs.begin() + NSpecs, 0); if (AllSpecs.size() > NSpecs) { @@ -780,47 +863,90 @@ bool FunctionSpecializer::run() { // Create the chosen specializations. SmallPtrSet OriginalFuncs; SmallVector Clones; + // Does this also need to include the base function in the hash, or is the + // SpecSig sufficient + DenseMap UniqueClones; for (unsigned I = 0; I < NSpecs; ++I) { Spec &S = AllSpecs[BestSpecs[I]]; - // Accumulate the codesize growth for the function, now we are creating the - // specialization. - FunctionGrowth[S.F] += S.CodeSize; - - S.Clone = createSpecialization(S.F, S.Sig); - // Update the known call sites to call the clone. - for (CallBase *Call : S.CallSites) { - Function *Clone = S.Clone; - LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call - << " to call " << Clone->getName() << "\n"); - Call->setCalledFunction(S.Clone); - auto &BFI = GetBFI(*Call->getFunction()); - std::optional Count = - BFI.getBlockProfileCount(Call->getParent()); - if (Count && !ProfcheckDisableMetadataFixes) { - std::optional MaybeCloneCount = - Clone->getEntryCount(); - if (MaybeCloneCount) { - uint64_t CallCount = *Count + MaybeCloneCount->getCount(); - Clone->setEntryCount(CallCount); - if (std::optional MaybeOriginalCount = - S.F->getEntryCount()) { - uint64_t OriginalCount = MaybeOriginalCount->getCount(); - if (OriginalCount >= *Count) { - S.F->setEntryCount(OriginalCount - *Count); - } else { - // This should generally not happen as that would mean there are - // more computed calls to the function than what was recorded. - LLVM_DEBUG(S.F->setEntryCount(0)); + ValueToValueMapTy Mappings; + + auto actuallySpecialize = [&](auto &&actuallySpecialize, Spec &S, + CallSiteStatusT Status, unsigned Parent, + ValueToValueMapTy &Mappings) -> void { + if (Status == CallSiteStatusT::HAS_PARENT) { + for (auto &CS : S.CallSites) { + if (CS.Status == Status && CS.Parent == Parent) { + CallBase *&Call = CS.CallSite; + Value *V = Mappings[Call]; + Call = dyn_cast(V); + } + } + } + + bool NewClone; + ValueToValueMapTy CurrMappings; + if (auto It = UniqueClones.find(S.Sig); It != UniqueClones.end()) { + NewClone = false; + S.Clone = It->second; + } else { + NewClone = true; + S.Clone = createSpecialization(S.F, S.Sig, CurrMappings); + + // Accumulate the codesize growth for the function, now we are creating + // the specialization. + FunctionGrowth[S.F] += S.CodeSize; + + UniqueClones[S.Sig] = S.Clone; + Clones.push_back(S.Clone); + OriginalFuncs.insert(S.F); + } + for (auto &CS : S.CallSites) { + if (CS.Status == Status && CS.Parent == Parent) { + Function *Clone = S.Clone; + CallBase *&Call = CS.CallSite; + LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call + << " to call " << Clone->getName() << "\n"); + Call->setCalledFunction(Clone); + auto &BFI = GetBFI(*Call->getFunction()); + std::optional Count = + BFI.getBlockProfileCount(Call->getParent()); + if (Count && !ProfcheckDisableMetadataFixes) { + std::optional MaybeCloneCount = + Clone->getEntryCount(); + if (MaybeCloneCount) { + uint64_t CallCount = *Count + MaybeCloneCount->getCount(); + Clone->setEntryCount(CallCount); + if (std::optional + MaybeOriginalCount = S.F->getEntryCount()) { + uint64_t OriginalCount = MaybeOriginalCount->getCount(); + if (OriginalCount >= *Count) { + S.F->setEntryCount(OriginalCount - *Count); + } else { + // This should generally not happen as that would mean there + // are more computed calls to the function than what was + // recorded. + LLVM_DEBUG(S.F->setEntryCount(0)); + } + } } } } } - } + if (!NewClone) + return; + for (auto &SSI : S.SubSpecs) { + Spec &SS = AllSpecs[SSI]; + actuallySpecialize(actuallySpecialize, SS, + /*Status*/ CallSiteStatusT::HAS_PARENT, + /*Parent*/ S.Loc, CurrMappings); + } + }; - Clones.push_back(S.Clone); - OriginalFuncs.insert(S.F); + actuallySpecialize(actuallySpecialize, S, + /*hasParent*/ CallSiteStatusT::NO_PARENT, /*Parent*/ 0, + Mappings); } Solver.solveWhileResolvedUndefsIn(Clones); @@ -828,10 +954,8 @@ bool FunctionSpecializer::run() { // Update the rest of the call sites - these are the recursive calls, calls // to discarded specialisations and calls that may match a specialisation // after the solver runs. - for (Function *F : OriginalFuncs) { - auto [Begin, End] = SM[F]; - updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End); - } + for (Function *F : OriginalFuncs) + updateCallSites(F, SM[F], AllSpecs); for (Function *F : Clones) { if (F->getReturnType()->isVoidTy()) @@ -890,21 +1014,20 @@ void FunctionSpecializer::removeDeadFunctions() { /// Clone the function \p F and remove the ssa_copy intrinsics added by /// the SCCPSolver in the cloned version. -static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) { - ValueToValueMapTy Mappings; +static Function *cloneCandidateFunction(Function *F, unsigned NSpecs, + ValueToValueMapTy &Mappings) { Function *Clone = CloneFunction(F, Mappings); Clone->setName(F->getName() + ".specialized." + Twine(NSpecs)); removeSSACopy(*Clone); return Clone; } -bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, - SmallVectorImpl &AllSpecs, - SpecMap &SM) { - // A mapping from a specialisation signature to the index of the respective - // entry in the all specialisation array. Used to ensure uniqueness of - // specialisations. - DenseMap UniqueSpecs; +bool FunctionSpecializer::findSpecializations( + unsigned FuncSize, SmallVectorImpl &AllSpecs, SpecMap &SM, Spec &InS, + DenseMap &UniqueSpecs, + SmallPtrSet &CurrentChain) { + Function *F = InS.F; + bool FoundSpecialization = false; // Get a list of interesting arguments. SmallVector Args; @@ -915,15 +1038,32 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, if (Args.empty()) return false; - for (User *U : F->users()) { - if (!isa(U) && !isa(U)) - continue; - auto &CS = *cast(U); + SmallVector CallSites; + CallSiteStatusT Status; + if (InS.CallSites.size()) { + assert(InS.CallSites.size() == 1 && + "Should only be passing single call spec as part of a chain"); + CallSites.push_back(InS.CallSites[0].CallSite); + Status = CallSiteStatusT::AWAITING_PARENT; + } else { + Status = CallSiteStatusT::NO_PARENT; + for (User *U : F->users()) { + // If multiple funcs, check that user is proceeding func + if (!isa(U) && !isa(U)) + continue; + auto *CS = cast(U); - // The user instruction does not call our function. - if (CS.getCalledFunction() != F) - continue; + // The user instruction does not call our function. + if (CS->getCalledFunction() != F) + continue; + CallSites.push_back(CS); + } + } + + for (auto *CSP : CallSites) { + auto &CS = *CSP; + Spec Chain(F, /*CallSite*/ CSP, Status); // If the call site has attribute minsize set, that callsite won't be // specialized. if (CS.hasFnAttr(Attribute::MinSize)) @@ -938,18 +1078,44 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, // constant operands of this call site. SpecSig S; for (Argument *A : Args) { - Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo())); - if (!C) - continue; - LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " - << A->getName() << " : " << C->getNameOrAsOperand() - << "\n"); - S.Args.push_back({A, C}); + // Check if this argument is constant from the call chain propogation + unsigned Idx; + auto &As = InS.Sig.Args; + for (Idx = 0; Idx < As.size(); ++Idx) { + if (As[Idx].Formal == A) + break; + } + if (As.size() == Idx) { + unsigned ArgNo = A->getArgNo(); + if (ArgNo >= CS.arg_size()) + continue; + Value *PossC = CS.getArgOperand(ArgNo); + Constant *C = getCandidateConstant(PossC); + if (!C) + continue; + LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " + << A->getName() << " : " << C->getNameOrAsOperand() + << "\n"); + S.Args.push_back({A, C}); + if (InS.CallSites.size()) { + assert(InS.CallSites.size() == 1 && + "Should only be passing single call spec as part of a chain"); + InS.Sig.Args.push_back({A, C}); + } + } else { + Constant *C = InS.Sig.Args[Idx].Actual; + S.Args.push_back({A, C}); + LLVM_DEBUG(dbgs() << "FnSpecialization: Found passed argument " + << A->getName() << " : " << C->getNameOrAsOperand() + << "\n"); + } } if (S.Args.empty()) continue; + CallUserT CallUsers; + // Check if we have encountered the same specialisation already. if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) { // Existing specialisation. Add the call to the list to rewrite, unless @@ -961,21 +1127,80 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, if (CS.getFunction() == F) continue; const unsigned Index = It->second; - AllSpecs[Index].CallSites.push_back(&CS); + AllSpecs[Index].addCall({&CS, Status, /*Parent*/ 0}); } else { // Calculate the specialisation gain. Cost CodeSize; unsigned Score = 0; InstCostVisitor Visitor = getInstCostVisitorFor(F); for (ArgInfo &A : S.Args) { - CodeSize += Visitor.getCodeSizeSavingsForArg(A.Formal, A.Actual); + CodeSize += + Visitor.getCodeSizeSavingsForArg(A.Formal, A.Actual, &CallUsers); Score += getInliningBonus(A.Formal, A.Actual); } - CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs(); + + CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs(&CallUsers); + CurrentChain.insert(F); + + for (auto &CU : CallUsers) { + Function *NewF = CU.second.second; + + // Recurse only if constants found for the function + if (!NewF || NewF->isVarArg()) + continue; + + // Don't allow any recursion in chains + bool isRecursion = CurrentChain.contains(NewF); + if (isRecursion) + continue; + + LLVM_DEBUG( + dbgs() << "FnSpecialization: Recursively calling runOneSpec() on " + << NewF->getName() << "\n"); + + // Since the function might not yet be known when processing the + // constants due to a function pointer, wait to extract the argument + // pointer at a given index. + SpecSig NewS; + for (auto &P : CU.second.first) + NewS.Args.push_back({NewF->getArg(P.first), P.second}); + + Spec CallSpec(NewF, /*CallSite*/ CU.first, NewS, + /*Status*/ CallSiteStatusT::AWAITING_PARENT); + runOneSpec(CallSpec, /*Chained*/ true, SM, AllSpecs, UniqueSpecs, + CurrentChain); + + // Use CallSpec.Sig since may have been added to within findSpec() + if (auto It = UniqueSpecs.find(CallSpec.Sig); It != UniqueSpecs.end()) { + const unsigned Index = It->second; + Chain.SubSpecs.push_back(Index); + } + } unsigned CodeSizeSavings = getCostValue(CodeSize); unsigned SpecSize = FuncSize - CodeSizeSavings; + // Cache savings information in the chain to use for profitibility + // analysis of the entire chain + Chain.CodeSize = SpecSize; + Chain.InlineScore = Score; + Chain.FuncSize = FuncSize; + unsigned CumulCodeSize = 0; + unsigned CumulFuncSize = 0; + unsigned CumulInlineScore = 0; + unsigned CumulLatency = 0; + auto getCumulScores = [&](auto &&getCumulScores, Spec &CurrSpec) -> void { + CumulCodeSize += CurrSpec.CodeSize; + CumulFuncSize += CurrSpec.FuncSize; + CumulInlineScore += CurrSpec.InlineScore; + CumulLatency += CurrSpec.Latency; + for (auto SSI : CurrSpec.SubSpecs) { + getCumulScores(getCumulScores, AllSpecs[SSI]); + } + }; + getCumulScores(getCumulScores, Chain); + unsigned CumulCodeSizeSavings = CumulFuncSize - CumulCodeSize; + auto IsProfitable = [&]() -> bool { // No check required. if (ForceSpecialization) @@ -984,56 +1209,105 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, LLVM_DEBUG( dbgs() << "FnSpecialization: Specialization bonus {Inlining = " << Score << " (" << (Score * 100 / FuncSize) << "%)}\n"); + LLVM_DEBUG( + dbgs() + << "FnSpecialization: Chain specialization bonus {Inlining = " + << CumulInlineScore << " (" + << (CumulInlineScore * 100 / CumulFuncSize) << "%)}\n"); // Minimum inlining bonus. - if (Score > MinInliningBonus * FuncSize / 100) + if ((Score > MinInliningBonus * FuncSize / 100) && + (CumulInlineScore > MinInliningBonus * CumulFuncSize / 100)) return true; LLVM_DEBUG( dbgs() << "FnSpecialization: Specialization bonus {CodeSize = " << CodeSizeSavings << " (" << (CodeSizeSavings * 100 / FuncSize) << "%)}\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Cumulative specialization " + "bonus {CodeSize = " + << CumulCodeSizeSavings << " (" + << (CumulCodeSizeSavings * 100 / CumulFuncSize) + << "%)}\n"); // Minimum codesize savings. - if (CodeSizeSavings < MinCodeSizeSavings * FuncSize / 100) + if ((CodeSizeSavings <= MinCodeSizeSavings * FuncSize / 100) && + (CumulCodeSizeSavings <= MinCodeSizeSavings * CumulFuncSize / 100)) return false; // Lazily compute the Latency, to avoid unnecessarily computing BFI. - unsigned LatencySavings = + Chain.Latency = getCostValue(Visitor.getLatencySavingsForKnownConstants()); + CumulLatency += Chain.Latency; LLVM_DEBUG( dbgs() << "FnSpecialization: Specialization bonus {Latency = " - << LatencySavings << " (" - << (LatencySavings * 100 / FuncSize) << "%)}\n"); + << CumulLatency << " (" + << (CumulLatency * 100 / CumulFuncSize) << "%)}\n"); // Minimum latency savings. - if (LatencySavings < MinLatencySavings * FuncSize / 100) + if (CumulLatency < MinLatencySavings * CumulFuncSize / 100) return false; // Maximum codesize growth. if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth) return false; - Score += std::max(CodeSizeSavings, LatencySavings); + Score = CumulInlineScore + std::max(CumulCodeSizeSavings, CumulLatency); return true; }; - // Discard unprofitable specialisations. - if (!IsProfitable()) + auto RemoveFromSubSpecs = [&](Spec &S) -> void { + for (unsigned &SSI : S.SubSpecs) { + Spec &SS = AllSpecs[SSI]; + auto NewEnd = std::remove_if( + SS.CallSites.begin(), SS.CallSites.end(), + [&](SpecCall &SC) -> bool { + return SC.Status == CallSiteStatusT::AWAITING_PARENT; + }); + SS.CallSites.erase(NewEnd, SS.CallSites.end()); + } + }; + + // Discard unprofitable specialisations + if (!IsProfitable()) { + RemoveFromSubSpecs(Chain); // Remove Parent from SubSpecs continue; + } + + auto AddParentToSubSpecs = [&](Spec &S) -> void { + for (unsigned &SSI : S.SubSpecs) { + Spec &SS = AllSpecs[SSI]; + for (SpecCall &SC : SS.CallSites) { + if (SC.Status == CallSiteStatusT::AWAITING_PARENT) { + SC.Status = CallSiteStatusT::HAS_PARENT; + SC.Parent = S.Loc; + } + } + } + }; // Create a new specialisation entry. - auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize); - if (CS.getFunction() != F) - Spec.CallSites.push_back(&CS); + auto &Spec = AllSpecs.emplace_back(Chain); const unsigned Index = AllSpecs.size() - 1; + Spec.Loc = Index; + AddParentToSubSpecs(Spec); + // Update the chain's Sig for any new constants at this level + Spec.Sig = S; + Spec.Score = Score; + + if (CS.getFunction() == F && !Spec.CallSites[0].Parent) { + Spec.CallSites.clear(); + // Don't reset AllChains since this can be standalone specialized + } UniqueSpecs[S] = Index; - if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted) - It->second.second = Index + 1; + + FoundSpecialization = true; + + SM[F].push_back(Index); } } - return !UniqueSpecs.empty(); + return FoundSpecialization; } bool FunctionSpecializer::isCandidateFunction(Function *F) { @@ -1065,9 +1339,11 @@ bool FunctionSpecializer::isCandidateFunction(Function *F) { return true; } -Function *FunctionSpecializer::createSpecialization(Function *F, - const SpecSig &S) { - Function *Clone = cloneCandidateFunction(F, Specializations.size() + 1); +Function * +FunctionSpecializer::createSpecialization(Function *F, const SpecSig &S, + ValueToValueMapTy &Mappings) { + Function *Clone = + cloneCandidateFunction(F, Specializations.size() + 1, Mappings); // The original function does not neccessarily have internal linkage, but the // clone must. @@ -1207,8 +1483,9 @@ Constant *FunctionSpecializer::getCandidateConstant(Value *V) { return C; } -void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin, - const Spec *End) { +void FunctionSpecializer::updateCallSites(Function *F, + const SmallVector &Specs, + SmallVector AllSpecs) { // Collect the call sites that need updating. SmallVector ToUpdate; for (User *U : F->users()) @@ -1223,7 +1500,8 @@ void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin, // Find the best matching specialisation. const Spec *BestSpec = nullptr; - for (const Spec &S : make_range(Begin, End)) { + for (const unsigned SI : Specs) { + const auto &S = AllSpecs[SI]; if (!S.Clone || (BestSpec && S.Score <= BestSpec->Score)) continue; diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll index 668929824cc6f..456480b2cc674 100644 --- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll +++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-60191.ll @@ -60,7 +60,7 @@ define i32 @f2(i32 %offset) { } ; Tests that `func` has been specialized and it didn't cause compiler crash. +; CHECK-DAG: func.specialized.4 +; CHECK-DAG: func.specialized.5 ; CHECK-DAG: func.specialized.1 -; CHECK-DAG: func.specialized.2 -; CHECK-DAG: func.specialized.3 diff --git a/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll b/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll index 134a79d349035..337780d0de2e4 100644 --- a/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll +++ b/llvm/test/Transforms/FunctionSpecialization/dead-gv-load.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=ipsccp --funcspec-min-function-size=1 -S < %s | FileCheck %s - +; RUN: opt -passes=ipsccp --funcspec-min-function-size=1 \ +; RUN: -funcspec-min-codesize-savings=1 -S < %s | FileCheck %s @gv = internal global ptr null define i8 @caller() { diff --git a/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll index 82d1f7ae4a6e1..7dc7e8ec69f50 100644 --- a/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll +++ b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll @@ -26,7 +26,7 @@ entry: ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add(i32 0, i32 [[N]]) ; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add(i32 1, i32 [[N]]) -; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add.specialized.1(i32 1, i32 1) +; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add(i32 1, i32 1) ; CHECK-NEXT: ret void ; ; @@ -36,9 +36,3 @@ entry: ; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]] ; CHECK-NEXT: ret i32 [[RES]] ; -; -; CHECK-LABEL: define internal i32 @add.specialized.1( -; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: ret i32 poison -; diff --git a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll index fc17387dec94d..ff90634ddd424 100644 --- a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll +++ b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; REQUIRES: asserts ; RUN: opt -passes="ipsccp,inline,instcombine,simplifycfg" -S \ ; RUN: -funcspec-min-function-size=23 -funcspec-max-iters=100 \ @@ -6,11 +7,40 @@ ; Make sure the number of specializations created are not ; linear to the number of iterations (funcspec-max-iters). -; CHECK: FnSpecialization: Created 4 specializations in module - @Global = internal constant i32 1, align 4 define internal void @recursiveFunc(ptr readonly %arg) { +; CHECK-LABEL: define internal void @recursiveFunc( +; CHECK-SAME: ptr readonly [[ARG:%.*]]) { +; CHECK-NEXT: [[TEMP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ARG_LOAD:%.*]] = load i32, ptr [[ARG]], align 4 +; CHECK-NEXT: [[ARG_CMP:%.*]] = icmp slt i32 [[ARG_LOAD]], 10000 +; CHECK-NEXT: br i1 [[ARG_CMP]], label %[[LOOP1:.*]], label %[[RET_BLOCK:.*]] +; CHECK: [[LOOP1]]: +; CHECK-NEXT: br label %[[LOOP2:.*]] +; CHECK: [[LOOP2]]: +; CHECK-NEXT: br label %[[LOOP3:.*]] +; CHECK: [[LOOP3]]: +; CHECK-NEXT: br label %[[LOOP4:.*]] +; CHECK: [[LOOP4]]: +; CHECK-NEXT: call void @print_val(i32 [[ARG_LOAD]]) +; CHECK-NEXT: [[ARG_ADD:%.*]] = add nsw i32 [[ARG_LOAD]], 1 +; CHECK-NEXT: store i32 [[ARG_ADD]], ptr [[TEMP]], align 4 +; CHECK-NEXT: call void @recursiveFunc(ptr nonnull [[TEMP]]) +; CHECK-NEXT: [[EXIT_COND1:%.*]] = call i1 @exit_cond() +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP4]], label %[[LOOP3_END:.*]] +; CHECK: [[LOOP3_END]]: +; CHECK-NEXT: [[EXIT_COND2:%.*]] = call i1 @exit_cond() +; CHECK-NEXT: br i1 [[EXIT_COND2]], label %[[LOOP3]], label %[[LOOP2_END:.*]] +; CHECK: [[LOOP2_END]]: +; CHECK-NEXT: [[EXIT_COND3:%.*]] = call i1 @exit_cond() +; CHECK-NEXT: br i1 [[EXIT_COND3]], label %[[LOOP2]], label %[[LOOP1_END:.*]] +; CHECK: [[LOOP1_END]]: +; CHECK-NEXT: [[EXIT_COND4:%.*]] = call i1 @exit_cond() +; CHECK-NEXT: br i1 [[EXIT_COND4]], label %[[LOOP1]], label %[[RET_BLOCK]] +; CHECK: [[RET_BLOCK]]: +; CHECK-NEXT: ret void +; %temp = alloca i32, align 4 %arg.load = load i32, ptr %arg, align 4 %arg.cmp = icmp slt i32 %arg.load, 10000 @@ -56,6 +86,10 @@ ret.block: } define i32 @main() { +; CHECK-LABEL: define i32 @main() { +; CHECK-NEXT: call void @recursiveFunc(ptr nonnull @Global) +; CHECK-NEXT: ret i32 0 +; call void @recursiveFunc(ptr @Global) ret i32 0 } diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll new file mode 100644 index 0000000000000..e09f33e7fadaa --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/specialize-chain.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +; REQUIRES: asserts +; RUN: opt -passes=ipsccp -S -funcspec-min-function-size=1 -funcspec-min-codesize-savings=20 -debug-only=function-specialization < %s 2>&1 | FileCheck %s + +define i32 @incr(i32 %a) { + %b = add nsw i32 %a, 1 + %c = add nsw i32 %b, 1 + %d = add nsw i32 %c, 1 + %e = add nsw i32 %d, 1 + %f = add nsw i32 %e, 1 + %g = add nsw i32 %f, 1 + ret i32 %g +} + +define i32 @forward_outer(i32 %a) { +entry: + %call = call i32 @forward_inner(i32 %a) + ret i32 %call +} + +define i32 @multi_call(i32 %a) { +entry: + %call = call i32 @incr(i32 %a) + %mul = mul nsw i32 %a, 2 + %mul_call = call i32 @incr(i32 %mul) + ret i32 %call +} + +define i32 @forward_inner(i32 %a) { +entry: + %call = call i32 @incr(i32 %a) + ret i32 %call +} + +define i32 @forward_unfold(i32 %a) { +entry: + %b = mul nsw i32 %a, 10 + %call = call i32 @incr(i32 %b) + %c = mul nsw i32 %call, 20 + ret i32 %c +} + +define dso_local signext i32 @intrinsic(i64 %a) { + %local_dest = alloca [1024 x i32], align 4 + %local_src = alloca [1024 x i32], align 4 + call void @llvm.memcpy.p0.p0.i64(ptr %local_dest, ptr %local_src, i64 %a, i1 false) + ret i32 0 +} + +define i32 @main() { +entry: + %add = call i32 @incr(i32 10) + %int = call i32 @intrinsic(i32 3) + %fwd_unfold = call i32 @forward_unfold(i32 3) + %fwd_inner = call i32 @forward_inner(i32 3) + %fwd_outer = call i32 @forward_outer(i32 3) + %fwd_outer1 = call i32 @forward_outer(i32 3) + %multi_call = call i32 @multi_call(i32 5) + ret i32 %multi_call +} + + + + + +; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @incr( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[A]], 1 +; CHECK-NEXT: [[C:%.*]] = add nsw i32 [[INC]], 1 +; CHECK-NEXT: [[D:%.*]] = add nsw i32 [[C]], 1 +; CHECK-NEXT: [[E:%.*]] = add nsw i32 [[D]], 1 +; CHECK-NEXT: [[F:%.*]] = add nsw i32 [[E]], 1 +; CHECK-NEXT: [[G:%.*]] = add nsw i32 [[F]], 1 +; CHECK-NEXT: ret i32 [[G]] +; +; +; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @forward_outer( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @forward_inner(i32 [[A]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @multi_call( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr(i32 [[A]]) +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A]], 2 +; CHECK-NEXT: [[MUL_CALL:%.*]] = call i32 @incr(i32 [[MUL]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define range(i32 -2147483642, -2147483648) i32 @forward_inner( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr(i32 [[A]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; +; CHECK-LABEL: define i32 @forward_unfold( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[B:%.*]] = mul nsw i32 [[A]], 10 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr(i32 [[B]]) +; CHECK-NEXT: [[C:%.*]] = mul nsw i32 [[CALL]], 20 +; CHECK-NEXT: ret i32 [[C]] +; +; +; CHECK-LABEL: define dso_local signext i32 @intrinsic( +; CHECK-SAME: i64 [[A:%.*]]) { +; CHECK-NEXT: [[LOCAL_DEST:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: [[LOCAL_SRC:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[LOCAL_DEST]], ptr [[LOCAL_SRC]], i64 [[A]], i1 false) +; CHECK-NEXT: ret i32 0 +; +; +; CHECK-LABEL: define i32 @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD:%.*]] = call i32 @incr.specialized.1(i32 10) +; CHECK-NEXT: [[INT:%.*]] = call i32 @intrinsic(i32 3) +; CHECK-NEXT: [[FWD_UNFOLD:%.*]] = call i32 @forward_unfold.specialized.2(i32 3) +; CHECK-NEXT: [[FWD_INNER:%.*]] = call i32 @forward_inner.specialized.4(i32 3) +; CHECK-NEXT: [[FWD_OUTER:%.*]] = call i32 @forward_outer.specialized.6(i32 3) +; CHECK-NEXT: [[FWD_OUTER1:%.*]] = call i32 @forward_outer.specialized.6(i32 3) +; CHECK-NEXT: [[MULTI_CALL:%.*]] = call i32 @multi_call.specialized.7(i32 5) +; CHECK-NEXT: ret i32 11 +; +; +; CHECK-LABEL: define internal i32 @incr.specialized.1( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: ret i32 poison +; +; +; CHECK-LABEL: define internal i32 @forward_unfold.specialized.2( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr.specialized.3(i32 30) +; CHECK-NEXT: ret i32 poison +; +; +; CHECK-LABEL: define internal i32 @incr.specialized.3( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: ret i32 poison +; +; +; CHECK-LABEL: define internal i32 @forward_inner.specialized.4( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr.specialized.5(i32 3) +; CHECK-NEXT: ret i32 poison +; +; +; CHECK-LABEL: define internal i32 @incr.specialized.5( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: ret i32 poison +; +; +; CHECK-LABEL: define internal i32 @forward_outer.specialized.6( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @forward_inner.specialized.4(i32 3) +; CHECK-NEXT: ret i32 poison +; +; +; CHECK-LABEL: define internal i32 @multi_call.specialized.7( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @incr.specialized.8(i32 5) +; CHECK-NEXT: [[MUL_CALL:%.*]] = call i32 @incr.specialized.1(i32 10) +; CHECK-NEXT: ret i32 poison +; +; +; CHECK-LABEL: define internal i32 @incr.specialized.8( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: ret i32 poison +; diff --git a/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll index f4ba0e72a1b43..ef40bf12ae59d 100644 --- a/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll +++ b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll @@ -48,9 +48,8 @@ entry: ; CHECK-NEXT: [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true) ; CHECK-NEXT: [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false) ; CHECK-NEXT: [[C1:%.*]] = call i64 @compute.specialized.3(ptr @plus) -; CHECK-NEXT: [[C2:%.*]] = call i64 @compute.specialized.4(ptr @minus) -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[C1]], [[C2]] -; CHECK-NEXT: ret i64 [[ADD]] +; CHECK-NEXT: [[C2:%.*]] = call i64 @compute.specialized.5(ptr @minus) +; CHECK-NEXT: ret i64 2 ; ; ; CHECK-LABEL: define ptr @select_op( @@ -87,15 +86,27 @@ entry: ; CHECK-LABEL: define internal i64 @compute.specialized.3( ; CHECK-SAME: ptr [[OP:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[RES:%.*]] = call i64 @plus(i64 1) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @plus.specialized.4(i64 1) +; CHECK-NEXT: ret i64 poison ; ; -; CHECK-LABEL: define internal i64 @compute.specialized.4( +; CHECK-LABEL: define internal i64 @plus.specialized.4( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i64 poison +; +; +; CHECK-LABEL: define internal i64 @compute.specialized.5( ; CHECK-SAME: ptr [[OP:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[RES:%.*]] = call i64 @minus(i64 1) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @minus.specialized.6(i64 1) +; CHECK-NEXT: ret i64 poison +; +; +; CHECK-LABEL: define internal i64 @minus.specialized.6( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i64 poison ; ; ; NOLIT-LABEL: define i64 @main() { diff --git a/llvm/test/Transforms/FunctionSpecialization/track-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-return.ll index aaff6c138bbaa..d03d7c872ed79 100644 --- a/llvm/test/Transforms/FunctionSpecialization/track-return.ll +++ b/llvm/test/Transforms/FunctionSpecialization/track-return.ll @@ -4,7 +4,7 @@ define i64 @main() { ; CHECK: define i64 @main ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C1:%.*]] = call i64 @foo.specialized.1(i1 true, i64 3, i64 1) -; CHECK-NEXT: [[C2:%.*]] = call i64 @foo.specialized.2(i1 false, i64 4, i64 -1) +; CHECK-NEXT: [[C2:%.*]] = call i64 @foo.specialized.3(i1 false, i64 4, i64 -1) ; CHECK-NEXT: ret i64 8 ; entry: @@ -15,27 +15,6 @@ entry: } define internal i64 @foo(i1 %flag, i64 %m, i64 %n) { -; -; CHECK: define internal i64 @foo.specialized.1 -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %plus -; CHECK: plus: -; CHECK-NEXT: [[N0:%.*]] = call i64 @binop.specialized.4(i64 3, i64 1) -; CHECK-NEXT: [[RES0:%.*]] = call i64 @bar.specialized.6(i64 4) -; CHECK-NEXT: br label %merge -; CHECK: merge: -; CHECK-NEXT: ret i64 poison -; -; CHECK: define internal i64 @foo.specialized.2 -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %minus -; CHECK: minus: -; CHECK-NEXT: [[N1:%.*]] = call i64 @binop.specialized.3(i64 4, i64 -1) -; CHECK-NEXT: [[RES1:%.*]] = call i64 @bar.specialized.5(i64 3) -; CHECK-NEXT: br label %merge -; CHECK: merge: -; CHECK-NEXT: ret i64 poison -; entry: br i1 %flag, label %plus, label %minus @@ -55,21 +34,61 @@ merge: } define internal i64 @binop(i64 %x, i64 %y) { +entry: + %z = add i64 %x, %y + ret i64 %z +} + +define internal i64 @bar(i64 %n) { +entry: + %cmp = icmp sgt i64 %n, 3 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %res0 = sdiv i64 %n, 2 + br label %if.end + +if.else: + %res1 = mul i64 %n, 2 + br label %if.end + +if.end: + %res = phi i64 [ %res0, %if.then ], [ %res1, %if.else] + ret i64 %res +} + +; +; CHECK: define internal i64 @foo.specialized.1 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %plus +; CHECK: plus: +; CHECK-NEXT: [[N0:%.*]] = call i64 @binop.specialized.2(i64 3, i64 1) +; CHECK-NEXT: [[RES0:%.*]] = call i64 @bar.specialized.6(i64 4) +; CHECK-NEXT: br label %merge +; CHECK: merge: +; CHECK-NEXT: ret i64 poison +; ; -; CHECK: define internal i64 @binop.specialized.3 +; CHECK: define internal i64 @binop.specialized.2 ; CHECK-NEXT: entry: ; CHECK-NEXT: ret i64 poison ; +; +; CHECK: define internal i64 @foo.specialized.3 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %minus +; CHECK: minus: +; CHECK-NEXT: [[N1:%.*]] = call i64 @binop.specialized.4(i64 4, i64 -1) +; CHECK-NEXT: [[RES1:%.*]] = call i64 @bar.specialized.5(i64 3) +; CHECK-NEXT: br label %merge +; CHECK: merge: +; CHECK-NEXT: ret i64 poison +; +; ; CHECK: define internal i64 @binop.specialized.4 ; CHECK-NEXT: entry: ; CHECK-NEXT: ret i64 poison ; -entry: - %z = add i64 %x, %y - ret i64 %z -} - -define internal i64 @bar(i64 %n) { ; ; CHECK: define internal i64 @bar.specialized.5 ; CHECK-NEXT: entry: @@ -87,20 +106,3 @@ define internal i64 @bar(i64 %n) { ; CHECK: if.end: ; CHECK-NEXT: ret i64 poison ; -entry: - %cmp = icmp sgt i64 %n, 3 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %res0 = sdiv i64 %n, 2 - br label %if.end - -if.else: - %res1 = mul i64 %n, 2 - br label %if.end - -if.end: - %res = phi i64 [ %res0, %if.then ], [ %res1, %if.else] - ret i64 %res -} -