diff --git a/llvm/include/llvm/Transforms/Scalar/LoopReroll.h b/llvm/include/llvm/Transforms/Scalar/LoopReroll.h deleted file mode 100644 index 496e8df85ea05..0000000000000 --- a/llvm/include/llvm/Transforms/Scalar/LoopReroll.h +++ /dev/null @@ -1,25 +0,0 @@ -//===- LoopReroll.h - Loop rerolling pass ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_SCALAR_LOOPREROLL_H -#define LLVM_TRANSFORMS_SCALAR_LOOPREROLL_H - -#include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" - -namespace llvm { - -class LoopRerollPass : public PassInfoMixin { -public: - PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, LPMUpdater &U); -}; - -} // end namespace llvm - -#endif // LLVM_TRANSFORMS_SCALAR_LOOPREROLL_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 7c306c4a21daf..007dc76f7ff6c 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -216,7 +216,6 @@ #include "llvm/Transforms/Scalar/LoopLoadElimination.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopPredication.h" -#include "llvm/Transforms/Scalar/LoopReroll.h" #include "llvm/Transforms/Scalar/LoopRotation.h" #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" #include "llvm/Transforms/Scalar/LoopSink.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 44511800ccff8..6cb87fba42646 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -599,7 +599,6 @@ LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass()) LOOP_PASS("loop-predication", LoopPredicationPass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass()) -LOOP_PASS("loop-reroll", LoopRerollPass()) LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass()) diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt index 2dd27037a17de..5527efa9cb63a 100644 --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -40,7 +40,6 @@ add_llvm_component_library(LLVMScalarOpts LoopLoadElimination.cpp LoopPassManager.cpp LoopPredication.cpp - LoopRerollPass.cpp LoopRotation.cpp LoopSimplifyCFG.cpp LoopStrengthReduce.cpp diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp deleted file mode 100644 index 7f62526a4f6db..0000000000000 --- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ /dev/null @@ -1,1679 +0,0 @@ -//===- LoopReroll.cpp - Loop rerolling pass -------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass implements a simple loop reroller. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar/LoopReroll.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" -#include -#include -#include -#include -#include -#include - -using namespace llvm; - -#define DEBUG_TYPE "loop-reroll" - -STATISTIC(NumRerolledLoops, "Number of rerolled loops"); - -static cl::opt -NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), - cl::Hidden, - cl::desc("The maximum number of failures to tolerate" - " during fuzzy matching. (default: 400)")); - -// This loop re-rolling transformation aims to transform loops like this: -// -// int foo(int a); -// void bar(int *x) { -// for (int i = 0; i < 500; i += 3) { -// foo(i); -// foo(i+1); -// foo(i+2); -// } -// } -// -// into a loop like this: -// -// void bar(int *x) { -// for (int i = 0; i < 500; ++i) -// foo(i); -// } -// -// It does this by looking for loops that, besides the latch code, are composed -// of isomorphic DAGs of instructions, with each DAG rooted at some increment -// to the induction variable, and where each DAG is isomorphic to the DAG -// rooted at the induction variable (excepting the sub-DAGs which root the -// other induction-variable increments). In other words, we're looking for loop -// bodies of the form: -// -// %iv = phi [ (preheader, ...), (body, %iv.next) ] -// f(%iv) -// %iv.1 = add %iv, 1 <-- a root increment -// f(%iv.1) -// %iv.2 = add %iv, 2 <-- a root increment -// f(%iv.2) -// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment -// f(%iv.scale_m_1) -// ... -// %iv.next = add %iv, scale -// %cmp = icmp(%iv, ...) -// br %cmp, header, exit -// -// where each f(i) is a set of instructions that, collectively, are a function -// only of i (and other loop-invariant values). -// -// As a special case, we can also reroll loops like this: -// -// int foo(int); -// void bar(int *x) { -// for (int i = 0; i < 500; ++i) { -// x[3*i] = foo(0); -// x[3*i+1] = foo(0); -// x[3*i+2] = foo(0); -// } -// } -// -// into this: -// -// void bar(int *x) { -// for (int i = 0; i < 1500; ++i) -// x[i] = foo(0); -// } -// -// in which case, we're looking for inputs like this: -// -// %iv = phi [ (preheader, ...), (body, %iv.next) ] -// %scaled.iv = mul %iv, scale -// f(%scaled.iv) -// %scaled.iv.1 = add %scaled.iv, 1 -// f(%scaled.iv.1) -// %scaled.iv.2 = add %scaled.iv, 2 -// f(%scaled.iv.2) -// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1 -// f(%scaled.iv.scale_m_1) -// ... -// %iv.next = add %iv, 1 -// %cmp = icmp(%iv, ...) -// br %cmp, header, exit - -namespace { - - enum IterationLimits { - /// The maximum number of iterations that we'll try and reroll. - IL_MaxRerollIterations = 32, - /// The bitvector index used by loop induction variables and other - /// instructions that belong to all iterations. - IL_All, - IL_End - }; - - class LoopReroll { - public: - LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE, - TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA) - : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT), - PreserveLCSSA(PreserveLCSSA) {} - bool runOnLoop(Loop *L); - - protected: - AliasAnalysis *AA; - LoopInfo *LI; - ScalarEvolution *SE; - TargetLibraryInfo *TLI; - DominatorTree *DT; - bool PreserveLCSSA; - - using SmallInstructionVector = SmallVector; - using SmallInstructionSet = SmallPtrSet; - using TinyInstructionVector = SmallVector; - - // Map between induction variable and its increment - DenseMap IVToIncMap; - - // For loop with multiple induction variables, remember the ones used only to - // control the loop. - TinyInstructionVector LoopControlIVs; - - // A chain of isomorphic instructions, identified by a single-use PHI - // representing a reduction. Only the last value may be used outside the - // loop. - struct SimpleLoopReduction { - SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) { - assert(isa(P) && "First reduction instruction must be a PHI"); - add(L); - } - - bool valid() const { - return Valid; - } - - Instruction *getPHI() const { - assert(Valid && "Using invalid reduction"); - return Instructions.front(); - } - - Instruction *getReducedValue() const { - assert(Valid && "Using invalid reduction"); - return Instructions.back(); - } - - Instruction *get(size_t i) const { - assert(Valid && "Using invalid reduction"); - return Instructions[i+1]; - } - - Instruction *operator [] (size_t i) const { return get(i); } - - // The size, ignoring the initial PHI. - size_t size() const { - assert(Valid && "Using invalid reduction"); - return Instructions.size()-1; - } - - using iterator = SmallInstructionVector::iterator; - using const_iterator = SmallInstructionVector::const_iterator; - - iterator begin() { - assert(Valid && "Using invalid reduction"); - return std::next(Instructions.begin()); - } - - const_iterator begin() const { - assert(Valid && "Using invalid reduction"); - return std::next(Instructions.begin()); - } - - iterator end() { return Instructions.end(); } - const_iterator end() const { return Instructions.end(); } - - protected: - bool Valid = false; - SmallInstructionVector Instructions; - - void add(Loop *L); - }; - - // The set of all reductions, and state tracking of possible reductions - // during loop instruction processing. - struct ReductionTracker { - using SmallReductionVector = SmallVector; - - // Add a new possible reduction. - void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); } - - // Setup to track possible reductions corresponding to the provided - // rerolling scale. Only reductions with a number of non-PHI instructions - // that is divisible by the scale are considered. Three instructions sets - // are filled in: - // - A set of all possible instructions in eligible reductions. - // - A set of all PHIs in eligible reductions - // - A set of all reduced values (last instructions) in eligible - // reductions. - void restrictToScale(uint64_t Scale, - SmallInstructionSet &PossibleRedSet, - SmallInstructionSet &PossibleRedPHISet, - SmallInstructionSet &PossibleRedLastSet) { - PossibleRedIdx.clear(); - PossibleRedIter.clear(); - Reds.clear(); - - for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i) - if (PossibleReds[i].size() % Scale == 0) { - PossibleRedLastSet.insert(PossibleReds[i].getReducedValue()); - PossibleRedPHISet.insert(PossibleReds[i].getPHI()); - - PossibleRedSet.insert(PossibleReds[i].getPHI()); - PossibleRedIdx[PossibleReds[i].getPHI()] = i; - for (Instruction *J : PossibleReds[i]) { - PossibleRedSet.insert(J); - PossibleRedIdx[J] = i; - } - } - } - - // The functions below are used while processing the loop instructions. - - // Are the two instructions both from reductions, and furthermore, from - // the same reduction? - bool isPairInSame(Instruction *J1, Instruction *J2) { - DenseMap::iterator J1I = PossibleRedIdx.find(J1); - if (J1I != PossibleRedIdx.end()) { - DenseMap::iterator J2I = PossibleRedIdx.find(J2); - if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second) - return true; - } - - return false; - } - - // The two provided instructions, the first from the base iteration, and - // the second from iteration i, form a matched pair. If these are part of - // a reduction, record that fact. - void recordPair(Instruction *J1, Instruction *J2, unsigned i) { - if (PossibleRedIdx.count(J1)) { - assert(PossibleRedIdx.count(J2) && - "Recording reduction vs. non-reduction instruction?"); - - PossibleRedIter[J1] = 0; - PossibleRedIter[J2] = i; - - int Idx = PossibleRedIdx[J1]; - assert(Idx == PossibleRedIdx[J2] && - "Recording pair from different reductions?"); - Reds.insert(Idx); - } - } - - // The functions below can be called after we've finished processing all - // instructions in the loop, and we know which reductions were selected. - - bool validateSelected(); - void replaceSelected(); - - protected: - // The vector of all possible reductions (for any scale). - SmallReductionVector PossibleReds; - - DenseMap PossibleRedIdx; - DenseMap PossibleRedIter; - DenseSet Reds; - }; - - // A DAGRootSet models an induction variable being used in a rerollable - // loop. For example, - // - // x[i*3+0] = y1 - // x[i*3+1] = y2 - // x[i*3+2] = y3 - // - // Base instruction -> i*3 - // +---+----+ - // / | \ - // ST[y1] +1 +2 <-- Roots - // | | - // ST[y2] ST[y3] - // - // There may be multiple DAGRoots, for example: - // - // x[i*2+0] = ... (1) - // x[i*2+1] = ... (1) - // x[i*2+4] = ... (2) - // x[i*2+5] = ... (2) - // x[(i+1234)*2+5678] = ... (3) - // x[(i+1234)*2+5679] = ... (3) - // - // The loop will be rerolled by adding a new loop induction variable, - // one for the Base instruction in each DAGRootSet. - // - struct DAGRootSet { - Instruction *BaseInst; - SmallInstructionVector Roots; - - // The instructions between IV and BaseInst (but not including BaseInst). - SmallInstructionSet SubsumedInsts; - }; - - // The set of all DAG roots, and state tracking of all roots - // for a particular induction variable. - struct DAGRootTracker { - DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV, - ScalarEvolution *SE, AliasAnalysis *AA, - TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI, - bool PreserveLCSSA, - DenseMap &IncrMap, - TinyInstructionVector LoopCtrlIVs) - : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI), - PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap), - LoopControlIVs(LoopCtrlIVs) {} - - /// Stage 1: Find all the DAG roots for the induction variable. - bool findRoots(); - - /// Stage 2: Validate if the found roots are valid. - bool validate(ReductionTracker &Reductions); - - /// Stage 3: Assuming validate() returned true, perform the - /// replacement. - /// @param BackedgeTakenCount The backedge-taken count of L. - void replace(const SCEV *BackedgeTakenCount); - - protected: - using UsesTy = MapVector; - - void findRootsRecursive(Instruction *IVU, - SmallInstructionSet SubsumedInsts); - bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts); - bool collectPossibleRoots(Instruction *Base, - std::map &Roots); - bool validateRootSet(DAGRootSet &DRS); - - bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet); - void collectInLoopUserSet(const SmallInstructionVector &Roots, - const SmallInstructionSet &Exclude, - const SmallInstructionSet &Final, - DenseSet &Users); - void collectInLoopUserSet(Instruction *Root, - const SmallInstructionSet &Exclude, - const SmallInstructionSet &Final, - DenseSet &Users); - - UsesTy::iterator nextInstr(int Val, UsesTy &In, - const SmallInstructionSet &Exclude, - UsesTy::iterator *StartI=nullptr); - bool isBaseInst(Instruction *I); - bool isRootInst(Instruction *I); - bool instrDependsOn(Instruction *I, - UsesTy::iterator Start, - UsesTy::iterator End); - void replaceIV(DAGRootSet &DRS, const SCEV *Start, const SCEV *IncrExpr); - - LoopReroll *Parent; - - // Members of Parent, replicated here for brevity. - Loop *L; - ScalarEvolution *SE; - AliasAnalysis *AA; - TargetLibraryInfo *TLI; - DominatorTree *DT; - LoopInfo *LI; - bool PreserveLCSSA; - - // The loop induction variable. - Instruction *IV; - - // Loop step amount. - int64_t Inc; - - // Loop reroll count; if Inc == 1, this records the scaling applied - // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ; - // If Inc is not 1, Scale = Inc. - uint64_t Scale; - - // The roots themselves. - SmallVector RootSets; - - // All increment instructions for IV. - SmallInstructionVector LoopIncs; - - // Map of all instructions in the loop (in order) to the iterations - // they are used in (or specially, IL_All for instructions - // used in the loop increment mechanism). - UsesTy Uses; - - // Map between induction variable and its increment - DenseMap &IVToIncMap; - - TinyInstructionVector LoopControlIVs; - }; - - // Check if it is a compare-like instruction whose user is a branch - bool isCompareUsedByBranch(Instruction *I) { - auto *TI = I->getParent()->getTerminator(); - if (!isa(TI) || !isa(I)) - return false; - return I->hasOneUse() && TI->getOperand(0) == I; - }; - - bool isLoopControlIV(Loop *L, Instruction *IV); - void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs); - void collectPossibleReductions(Loop *L, - ReductionTracker &Reductions); - bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, - const SCEV *BackedgeTakenCount, ReductionTracker &Reductions); - }; - -} // end anonymous namespace - -// Returns true if the provided instruction is used outside the given loop. -// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in -// non-loop blocks to be outside the loop. -static bool hasUsesOutsideLoop(Instruction *I, Loop *L) { - for (User *U : I->users()) { - if (!L->contains(cast(U))) - return true; - } - return false; -} - -// Check if an IV is only used to control the loop. There are two cases: -// 1. It only has one use which is loop increment, and the increment is only -// used by comparison and the PHI (could has sext with nsw in between), and the -// comparison is only used by branch. -// 2. It is used by loop increment and the comparison, the loop increment is -// only used by the PHI, and the comparison is used only by the branch. -bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) { - unsigned IVUses = IV->getNumUses(); - if (IVUses != 2 && IVUses != 1) - return false; - - for (auto *User : IV->users()) { - int32_t IncOrCmpUses = User->getNumUses(); - bool IsCompInst = isCompareUsedByBranch(cast(User)); - - // User can only have one or two uses. - if (IncOrCmpUses != 2 && IncOrCmpUses != 1) - return false; - - // Case 1 - if (IVUses == 1) { - // The only user must be the loop increment. - // The loop increment must have two uses. - if (IsCompInst || IncOrCmpUses != 2) - return false; - } - - // Case 2 - if (IVUses == 2 && IncOrCmpUses != 1) - return false; - - // The users of the IV must be a binary operation or a comparison - if (auto *BO = dyn_cast(User)) { - if (BO->getOpcode() == Instruction::Add) { - // Loop Increment - // User of Loop Increment should be either PHI or CMP - for (auto *UU : User->users()) { - if (PHINode *PN = dyn_cast(UU)) { - if (PN != IV) - return false; - } - // Must be a CMP or an ext (of a value with nsw) then CMP - else { - auto *UUser = cast(UU); - // Skip SExt if we are extending an nsw value - // TODO: Allow ZExt too - if (BO->hasNoSignedWrap() && UUser->hasOneUse() && - isa(UUser)) - UUser = cast(*(UUser->user_begin())); - if (!isCompareUsedByBranch(UUser)) - return false; - } - } - } else - return false; - // Compare : can only have one use, and must be branch - } else if (!IsCompInst) - return false; - } - return true; -} - -// Collect the list of loop induction variables with respect to which it might -// be possible to reroll the loop. -void LoopReroll::collectPossibleIVs(Loop *L, - SmallInstructionVector &PossibleIVs) { - for (Instruction &IV : L->getHeader()->phis()) { - if (!IV.getType()->isIntegerTy() && !IV.getType()->isPointerTy()) - continue; - - if (const SCEVAddRecExpr *PHISCEV = - dyn_cast(SE->getSCEV(&IV))) { - if (PHISCEV->getLoop() != L) - continue; - if (!PHISCEV->isAffine()) - continue; - const auto *IncSCEV = dyn_cast(PHISCEV->getStepRecurrence(*SE)); - if (IncSCEV) { - IVToIncMap[&IV] = IncSCEV->getValue()->getSExtValue(); - LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << IV << " = " << *PHISCEV - << "\n"); - - if (isLoopControlIV(L, &IV)) { - LoopControlIVs.push_back(&IV); - LLVM_DEBUG(dbgs() << "LRR: Loop control only IV: " << IV - << " = " << *PHISCEV << "\n"); - } else - PossibleIVs.push_back(&IV); - } - } - } -} - -// Add the remainder of the reduction-variable chain to the instruction vector -// (the initial PHINode has already been added). If successful, the object is -// marked as valid. -void LoopReroll::SimpleLoopReduction::add(Loop *L) { - assert(!Valid && "Cannot add to an already-valid chain"); - - // The reduction variable must be a chain of single-use instructions - // (including the PHI), except for the last value (which is used by the PHI - // and also outside the loop). - Instruction *C = Instructions.front(); - if (C->user_empty()) - return; - - do { - C = cast(*C->user_begin()); - if (C->hasOneUse()) { - if (!C->isBinaryOp()) - return; - - if (!(isa(Instructions.back()) || - C->isSameOperationAs(Instructions.back()))) - return; - - Instructions.push_back(C); - } - } while (C->hasOneUse()); - - if (Instructions.size() < 2 || - !C->isSameOperationAs(Instructions.back()) || - C->use_empty()) - return; - - // C is now the (potential) last instruction in the reduction chain. - for (User *U : C->users()) { - // The only in-loop user can be the initial PHI. - if (L->contains(cast(U))) - if (cast(U) != Instructions.front()) - return; - } - - Instructions.push_back(C); - Valid = true; -} - -// Collect the vector of possible reduction variables. -void LoopReroll::collectPossibleReductions(Loop *L, - ReductionTracker &Reductions) { - BasicBlock *Header = L->getHeader(); - for (BasicBlock::iterator I = Header->begin(), - IE = Header->getFirstInsertionPt(); I != IE; ++I) { - if (!isa(I)) - continue; - if (!I->getType()->isSingleValueType()) - continue; - - SimpleLoopReduction SLR(&*I, L); - if (!SLR.valid()) - continue; - - LLVM_DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " - << SLR.size() << " chained instructions)\n"); - Reductions.addSLR(SLR); - } -} - -// Collect the set of all users of the provided root instruction. This set of -// users contains not only the direct users of the root instruction, but also -// all users of those users, and so on. There are two exceptions: -// -// 1. Instructions in the set of excluded instructions are never added to the -// use set (even if they are users). This is used, for example, to exclude -// including root increments in the use set of the primary IV. -// -// 2. Instructions in the set of final instructions are added to the use set -// if they are users, but their users are not added. This is used, for -// example, to prevent a reduction update from forcing all later reduction -// updates into the use set. -void LoopReroll::DAGRootTracker::collectInLoopUserSet( - Instruction *Root, const SmallInstructionSet &Exclude, - const SmallInstructionSet &Final, - DenseSet &Users) { - SmallInstructionVector Queue(1, Root); - while (!Queue.empty()) { - Instruction *I = Queue.pop_back_val(); - if (!Users.insert(I).second) - continue; - - if (!Final.count(I)) - for (Use &U : I->uses()) { - Instruction *User = cast(U.getUser()); - if (PHINode *PN = dyn_cast(User)) { - // Ignore "wrap-around" uses to PHIs of this loop's header. - if (PN->getIncomingBlock(U) == L->getHeader()) - continue; - } - - if (L->contains(User) && !Exclude.count(User)) { - Queue.push_back(User); - } - } - - // We also want to collect single-user "feeder" values. - for (Use &U : I->operands()) { - if (Instruction *Op = dyn_cast(U)) - if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) && - !Final.count(Op)) - Queue.push_back(Op); - } - } -} - -// Collect all of the users of all of the provided root instructions (combined -// into a single set). -void LoopReroll::DAGRootTracker::collectInLoopUserSet( - const SmallInstructionVector &Roots, - const SmallInstructionSet &Exclude, - const SmallInstructionSet &Final, - DenseSet &Users) { - for (Instruction *Root : Roots) - collectInLoopUserSet(Root, Exclude, Final, Users); -} - -static bool isUnorderedLoadStore(Instruction *I) { - if (LoadInst *LI = dyn_cast(I)) - return LI->isUnordered(); - if (StoreInst *SI = dyn_cast(I)) - return SI->isUnordered(); - if (MemIntrinsic *MI = dyn_cast(I)) - return !MI->isVolatile(); - return false; -} - -/// Return true if IVU is a "simple" arithmetic operation. -/// This is used for narrowing the search space for DAGRoots; only arithmetic -/// and GEPs can be part of a DAGRoot. -static bool isSimpleArithmeticOp(User *IVU) { - if (Instruction *I = dyn_cast(IVU)) { - switch (I->getOpcode()) { - default: return false; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::Shl: - case Instruction::AShr: - case Instruction::LShr: - case Instruction::GetElementPtr: - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - return true; - } - } - return false; -} - -static bool isLoopIncrement(User *U, Instruction *IV) { - BinaryOperator *BO = dyn_cast(U); - - if ((BO && BO->getOpcode() != Instruction::Add) || - (!BO && !isa(U))) - return false; - - for (auto *UU : U->users()) { - PHINode *PN = dyn_cast(UU); - if (PN && PN == IV) - return true; - } - return false; -} - -bool LoopReroll::DAGRootTracker:: -collectPossibleRoots(Instruction *Base, std::map &Roots) { - SmallInstructionVector BaseUsers; - - for (auto *I : Base->users()) { - ConstantInt *CI = nullptr; - - if (isLoopIncrement(I, IV)) { - LoopIncs.push_back(cast(I)); - continue; - } - - // The root nodes must be either GEPs, ORs or ADDs. - if (auto *BO = dyn_cast(I)) { - if (BO->getOpcode() == Instruction::Add || - BO->getOpcode() == Instruction::Or) - CI = dyn_cast(BO->getOperand(1)); - } else if (auto *GEP = dyn_cast(I)) { - Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1); - CI = dyn_cast(LastOperand); - } - - if (!CI) { - if (Instruction *II = dyn_cast(I)) { - BaseUsers.push_back(II); - continue; - } else { - LLVM_DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I - << "\n"); - return false; - } - } - - int64_t V = std::abs(CI->getValue().getSExtValue()); - if (Roots.find(V) != Roots.end()) - // No duplicates, please. - return false; - - Roots[V] = cast(I); - } - - // Make sure we have at least two roots. - if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty())) - return false; - - // If we found non-loop-inc, non-root users of Base, assume they are - // for the zeroth root index. This is because "add %a, 0" gets optimized - // away. - if (BaseUsers.size()) { - if (Roots.find(0) != Roots.end()) { - LLVM_DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n"); - return false; - } - Roots[0] = Base; - } - - // Calculate the number of users of the base, or lowest indexed, iteration. - unsigned NumBaseUses = BaseUsers.size(); - if (NumBaseUses == 0) - NumBaseUses = Roots.begin()->second->getNumUses(); - - // Check that every node has the same number of users. - for (auto &KV : Roots) { - if (KV.first == 0) - continue; - if (!KV.second->hasNUses(NumBaseUses)) { - LLVM_DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: " - << "#Base=" << NumBaseUses - << ", #Root=" << KV.second->getNumUses() << "\n"); - return false; - } - } - - return true; -} - -void LoopReroll::DAGRootTracker:: -findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) { - // Does the user look like it could be part of a root set? - // All its users must be simple arithmetic ops. - if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1)) - return; - - if (I != IV && findRootsBase(I, SubsumedInsts)) - return; - - SubsumedInsts.insert(I); - - for (User *V : I->users()) { - Instruction *I = cast(V); - if (is_contained(LoopIncs, I)) - continue; - - if (!isSimpleArithmeticOp(I)) - continue; - - // The recursive call makes a copy of SubsumedInsts. - findRootsRecursive(I, SubsumedInsts); - } -} - -bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) { - if (DRS.Roots.empty()) - return false; - - // If the value of the base instruction is used outside the loop, we cannot - // reroll the loop. Check for other root instructions is unnecessary because - // they don't match any base instructions if their values are used outside. - if (hasUsesOutsideLoop(DRS.BaseInst, L)) - return false; - - // Consider a DAGRootSet with N-1 roots (so N different values including - // BaseInst). - // Define d = Roots[0] - BaseInst, which should be the same as - // Roots[I] - Roots[I-1] for all I in [1..N). - // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the - // loop iteration J. - // - // Now, For the loop iterations to be consecutive: - // D = d * N - const auto *ADR = dyn_cast(SE->getSCEV(DRS.BaseInst)); - if (!ADR) - return false; - - // Check that the first root is evenly spaced. - unsigned N = DRS.Roots.size() + 1; - const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR); - if (isa(StepSCEV) || StepSCEV->getType()->isPointerTy()) - return false; - const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N); - if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) - return false; - - // Check that the remainling roots are evenly spaced. - for (unsigned i = 1; i < N - 1; ++i) { - const SCEV *NewStepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[i]), - SE->getSCEV(DRS.Roots[i-1])); - if (NewStepSCEV != StepSCEV) - return false; - } - - return true; -} - -bool LoopReroll::DAGRootTracker:: -findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) { - // The base of a RootSet must be an AddRec, so it can be erased. - const auto *IVU_ADR = dyn_cast(SE->getSCEV(IVU)); - if (!IVU_ADR || IVU_ADR->getLoop() != L) - return false; - - std::map V; - if (!collectPossibleRoots(IVU, V)) - return false; - - // If we didn't get a root for index zero, then IVU must be - // subsumed. - if (V.find(0) == V.end()) - SubsumedInsts.insert(IVU); - - // Partition the vector into monotonically increasing indexes. - DAGRootSet DRS; - DRS.BaseInst = nullptr; - - SmallVector PotentialRootSets; - - for (auto &KV : V) { - if (!DRS.BaseInst) { - DRS.BaseInst = KV.second; - DRS.SubsumedInsts = SubsumedInsts; - } else if (DRS.Roots.empty()) { - DRS.Roots.push_back(KV.second); - } else if (V.find(KV.first - 1) != V.end()) { - DRS.Roots.push_back(KV.second); - } else { - // Linear sequence terminated. - if (!validateRootSet(DRS)) - return false; - - // Construct a new DAGRootSet with the next sequence. - PotentialRootSets.push_back(DRS); - DRS.BaseInst = KV.second; - DRS.Roots.clear(); - } - } - - if (!validateRootSet(DRS)) - return false; - - PotentialRootSets.push_back(DRS); - - RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end()); - - return true; -} - -bool LoopReroll::DAGRootTracker::findRoots() { - Inc = IVToIncMap[IV]; - - assert(RootSets.empty() && "Unclean state!"); - if (std::abs(Inc) == 1) { - for (auto *IVU : IV->users()) { - if (isLoopIncrement(IVU, IV)) - LoopIncs.push_back(cast(IVU)); - } - findRootsRecursive(IV, SmallInstructionSet()); - LoopIncs.push_back(IV); - } else { - if (!findRootsBase(IV, SmallInstructionSet())) - return false; - } - - // Ensure all sets have the same size. - if (RootSets.empty()) { - LLVM_DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n"); - return false; - } - for (auto &V : RootSets) { - if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) { - LLVM_DEBUG( - dbgs() - << "LRR: Aborting because not all root sets have the same size\n"); - return false; - } - } - - Scale = RootSets[0].Roots.size() + 1; - - if (Scale > IL_MaxRerollIterations) { - LLVM_DEBUG(dbgs() << "LRR: Aborting - too many iterations found. " - << "#Found=" << Scale - << ", #Max=" << IL_MaxRerollIterations << "\n"); - return false; - } - - LLVM_DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale - << "\n"); - - return true; -} - -bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) { - // Populate the MapVector with all instructions in the block, in order first, - // so we can iterate over the contents later in perfect order. - for (auto &I : *L->getHeader()) { - Uses[&I].resize(IL_End); - } - - SmallInstructionSet Exclude; - for (auto &DRS : RootSets) { - Exclude.insert(DRS.Roots.begin(), DRS.Roots.end()); - Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end()); - Exclude.insert(DRS.BaseInst); - } - Exclude.insert(LoopIncs.begin(), LoopIncs.end()); - - for (auto &DRS : RootSets) { - DenseSet VBase; - collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase); - for (auto *I : VBase) { - Uses[I].set(0); - } - - unsigned Idx = 1; - for (auto *Root : DRS.Roots) { - DenseSet V; - collectInLoopUserSet(Root, Exclude, PossibleRedSet, V); - - // While we're here, check the use sets are the same size. - if (V.size() != VBase.size()) { - LLVM_DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n"); - return false; - } - - for (auto *I : V) { - Uses[I].set(Idx); - } - ++Idx; - } - - // Make sure our subsumed instructions are remembered too. - for (auto *I : DRS.SubsumedInsts) { - Uses[I].set(IL_All); - } - } - - // Make sure the loop increments are also accounted for. - - Exclude.clear(); - for (auto &DRS : RootSets) { - Exclude.insert(DRS.Roots.begin(), DRS.Roots.end()); - Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end()); - Exclude.insert(DRS.BaseInst); - } - - DenseSet V; - collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V); - for (auto *I : V) { - if (I->mayHaveSideEffects()) { - LLVM_DEBUG(dbgs() << "LRR: Aborting - " - << "An instruction which does not belong to any root " - << "sets must not have side effects: " << *I); - return false; - } - Uses[I].set(IL_All); - } - - return true; -} - -/// Get the next instruction in "In" that is a member of set Val. -/// Start searching from StartI, and do not return anything in Exclude. -/// If StartI is not given, start from In.begin(). -LoopReroll::DAGRootTracker::UsesTy::iterator -LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In, - const SmallInstructionSet &Exclude, - UsesTy::iterator *StartI) { - UsesTy::iterator I = StartI ? *StartI : In.begin(); - while (I != In.end() && (I->second.test(Val) == 0 || - Exclude.contains(I->first))) - ++I; - return I; -} - -bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) { - for (auto &DRS : RootSets) { - if (DRS.BaseInst == I) - return true; - } - return false; -} - -bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) { - for (auto &DRS : RootSets) { - if (is_contained(DRS.Roots, I)) - return true; - } - return false; -} - -/// Return true if instruction I depends on any instruction between -/// Start and End. -bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I, - UsesTy::iterator Start, - UsesTy::iterator End) { - for (auto *U : I->users()) { - for (auto It = Start; It != End; ++It) - if (U == It->first) - return true; - } - return false; -} - -static bool isIgnorableInst(const Instruction *I) { - if (isa(I)) - return true; - const IntrinsicInst* II = dyn_cast(I); - if (!II) - return false; - switch (II->getIntrinsicID()) { - default: - return false; - case Intrinsic::annotation: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // TODO: the following intrinsics may also be allowed: - // lifetime_start, lifetime_end, invariant_start, invariant_end - return true; - } - return false; -} - -bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { - // We now need to check for equivalence of the use graph of each root with - // that of the primary induction variable (excluding the roots). Our goal - // here is not to solve the full graph isomorphism problem, but rather to - // catch common cases without a lot of work. As a result, we will assume - // that the relative order of the instructions in each unrolled iteration - // is the same (although we will not make an assumption about how the - // different iterations are intermixed). Note that while the order must be - // the same, the instructions may not be in the same basic block. - - // An array of just the possible reductions for this scale factor. When we - // collect the set of all users of some root instructions, these reduction - // instructions are treated as 'final' (their uses are not considered). - // This is important because we don't want the root use set to search down - // the reduction chain. - SmallInstructionSet PossibleRedSet; - SmallInstructionSet PossibleRedLastSet; - SmallInstructionSet PossibleRedPHISet; - Reductions.restrictToScale(Scale, PossibleRedSet, - PossibleRedPHISet, PossibleRedLastSet); - - // Populate "Uses" with where each instruction is used. - if (!collectUsedInstructions(PossibleRedSet)) - return false; - - // Make sure we mark the reduction PHIs as used in all iterations. - for (auto *I : PossibleRedPHISet) { - Uses[I].set(IL_All); - } - - // Make sure we mark loop-control-only PHIs as used in all iterations. See - // comment above LoopReroll::isLoopControlIV for more information. - BasicBlock *Header = L->getHeader(); - for (Instruction *LoopControlIV : LoopControlIVs) { - for (auto *U : LoopControlIV->users()) { - Instruction *IVUser = dyn_cast(U); - // IVUser could be loop increment or compare - Uses[IVUser].set(IL_All); - for (auto *UU : IVUser->users()) { - Instruction *UUser = dyn_cast(UU); - // UUser could be compare, PHI or branch - Uses[UUser].set(IL_All); - // Skip SExt - if (isa(UUser)) { - UUser = dyn_cast(*(UUser->user_begin())); - Uses[UUser].set(IL_All); - } - // Is UUser a compare instruction? - if (UU->hasOneUse()) { - Instruction *BI = dyn_cast(*UUser->user_begin()); - if (BI == cast(Header->getTerminator())) - Uses[BI].set(IL_All); - } - } - } - } - - // Make sure all instructions in the loop are in one and only one - // set. - for (auto &KV : Uses) { - if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) { - LLVM_DEBUG( - dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: " - << *KV.first << " (#uses=" << KV.second.count() << ")\n"); - return false; - } - } - - LLVM_DEBUG(for (auto &KV - : Uses) { - dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n"; - }); - - BatchAAResults BatchAA(*AA); - for (unsigned Iter = 1; Iter < Scale; ++Iter) { - // In addition to regular aliasing information, we need to look for - // instructions from later (future) iterations that have side effects - // preventing us from reordering them past other instructions with side - // effects. - bool FutureSideEffects = false; - AliasSetTracker AST(BatchAA); - // The map between instructions in f(%iv.(i+1)) and f(%iv). - DenseMap BaseMap; - - // Compare iteration Iter to the base. - SmallInstructionSet Visited; - auto BaseIt = nextInstr(0, Uses, Visited); - auto RootIt = nextInstr(Iter, Uses, Visited); - auto LastRootIt = Uses.begin(); - - while (BaseIt != Uses.end() && RootIt != Uses.end()) { - Instruction *BaseInst = BaseIt->first; - Instruction *RootInst = RootIt->first; - - // Skip over the IV or root instructions; only match their users. - bool Continue = false; - if (isBaseInst(BaseInst)) { - Visited.insert(BaseInst); - BaseIt = nextInstr(0, Uses, Visited); - Continue = true; - } - if (isRootInst(RootInst)) { - LastRootIt = RootIt; - Visited.insert(RootInst); - RootIt = nextInstr(Iter, Uses, Visited); - Continue = true; - } - if (Continue) continue; - - if (!BaseInst->isSameOperationAs(RootInst)) { - // Last chance saloon. We don't try and solve the full isomorphism - // problem, but try and at least catch the case where two instructions - // *of different types* are round the wrong way. We won't be able to - // efficiently tell, given two ADD instructions, which way around we - // should match them, but given an ADD and a SUB, we can at least infer - // which one is which. - // - // This should allow us to deal with a greater subset of the isomorphism - // problem. It does however change a linear algorithm into a quadratic - // one, so limit the number of probes we do. - auto TryIt = RootIt; - unsigned N = NumToleratedFailedMatches; - while (TryIt != Uses.end() && - !BaseInst->isSameOperationAs(TryIt->first) && - N--) { - ++TryIt; - TryIt = nextInstr(Iter, Uses, Visited, &TryIt); - } - - if (TryIt == Uses.end() || TryIt == RootIt || - instrDependsOn(TryIt->first, RootIt, TryIt)) { - LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " - << *BaseInst << " vs. " << *RootInst << "\n"); - return false; - } - - RootIt = TryIt; - RootInst = TryIt->first; - } - - // All instructions between the last root and this root - // may belong to some other iteration. If they belong to a - // future iteration, then they're dangerous to alias with. - // - // Note that because we allow a limited amount of flexibility in the order - // that we visit nodes, LastRootIt might be *before* RootIt, in which - // case we've already checked this set of instructions so we shouldn't - // do anything. - for (; LastRootIt < RootIt; ++LastRootIt) { - Instruction *I = LastRootIt->first; - if (LastRootIt->second.find_first() < (int)Iter) - continue; - if (I->mayWriteToMemory()) - AST.add(I); - // Note: This is specifically guarded by a check on isa, - // which while a valid (somewhat arbitrary) micro-optimization, is - // needed because otherwise isSafeToSpeculativelyExecute returns - // false on PHI nodes. - if (!isa(I) && !isUnorderedLoadStore(I) && - !isSafeToSpeculativelyExecute(I)) - // Intervening instructions cause side effects. - FutureSideEffects = true; - } - - // Make sure that this instruction, which is in the use set of this - // root instruction, does not also belong to the base set or the set of - // some other root instruction. - if (RootIt->second.count() > 1) { - LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst - << " vs. " << *RootInst << " (prev. case overlap)\n"); - return false; - } - - // Make sure that we don't alias with any instruction in the alias set - // tracker. If we do, then we depend on a future iteration, and we - // can't reroll. - if (RootInst->mayReadFromMemory()) { - for (auto &K : AST) { - if (isModOrRefSet(K.aliasesUnknownInst(RootInst, BatchAA))) { - LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " - << *BaseInst << " vs. " << *RootInst - << " (depends on future store)\n"); - return false; - } - } - } - - // If we've past an instruction from a future iteration that may have - // side effects, and this instruction might also, then we can't reorder - // them, and this matching fails. As an exception, we allow the alias - // set tracker to handle regular (unordered) load/store dependencies. - if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) && - !isSafeToSpeculativelyExecute(BaseInst)) || - (!isUnorderedLoadStore(RootInst) && - !isSafeToSpeculativelyExecute(RootInst)))) { - LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst - << " vs. " << *RootInst - << " (side effects prevent reordering)\n"); - return false; - } - - // For instructions that are part of a reduction, if the operation is - // associative, then don't bother matching the operands (because we - // already know that the instructions are isomorphic, and the order - // within the iteration does not matter). For non-associative reductions, - // we do need to match the operands, because we need to reject - // out-of-order instructions within an iteration! - // For example (assume floating-point addition), we need to reject this: - // x += a[i]; x += b[i]; - // x += a[i+1]; x += b[i+1]; - // x += b[i+2]; x += a[i+2]; - bool InReduction = Reductions.isPairInSame(BaseInst, RootInst); - - if (!(InReduction && BaseInst->isAssociative())) { - bool Swapped = false, SomeOpMatched = false; - for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) { - Value *Op2 = RootInst->getOperand(j); - - // If this is part of a reduction (and the operation is not - // associatve), then we match all operands, but not those that are - // part of the reduction. - if (InReduction) - if (Instruction *Op2I = dyn_cast(Op2)) - if (Reductions.isPairInSame(RootInst, Op2I)) - continue; - - DenseMap::iterator BMI = BaseMap.find(Op2); - if (BMI != BaseMap.end()) { - Op2 = BMI->second; - } else { - for (auto &DRS : RootSets) { - if (DRS.Roots[Iter-1] == (Instruction*) Op2) { - Op2 = DRS.BaseInst; - break; - } - } - } - - if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) { - // If we've not already decided to swap the matched operands, and - // we've not already matched our first operand (note that we could - // have skipped matching the first operand because it is part of a - // reduction above), and the instruction is commutative, then try - // the swapped match. - if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched && - BaseInst->getOperand(!j) == Op2) { - Swapped = true; - } else { - LLVM_DEBUG(dbgs() - << "LRR: iteration root match failed at " << *BaseInst - << " vs. " << *RootInst << " (operand " << j << ")\n"); - return false; - } - } - - SomeOpMatched = true; - } - } - - if ((!PossibleRedLastSet.count(BaseInst) && - hasUsesOutsideLoop(BaseInst, L)) || - (!PossibleRedLastSet.count(RootInst) && - hasUsesOutsideLoop(RootInst, L))) { - LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst - << " vs. " << *RootInst << " (uses outside loop)\n"); - return false; - } - - Reductions.recordPair(BaseInst, RootInst, Iter); - BaseMap.insert(std::make_pair(RootInst, BaseInst)); - - LastRootIt = RootIt; - Visited.insert(BaseInst); - Visited.insert(RootInst); - BaseIt = nextInstr(0, Uses, Visited); - RootIt = nextInstr(Iter, Uses, Visited); - } - assert(BaseIt == Uses.end() && RootIt == Uses.end() && - "Mismatched set sizes!"); - } - - LLVM_DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *IV - << "\n"); - - return true; -} - -void LoopReroll::DAGRootTracker::replace(const SCEV *BackedgeTakenCount) { - BasicBlock *Header = L->getHeader(); - - // Compute the start and increment for each BaseInst before we start erasing - // instructions. - SmallVector StartExprs; - SmallVector IncrExprs; - for (auto &DRS : RootSets) { - const SCEVAddRecExpr *IVSCEV = - cast(SE->getSCEV(DRS.BaseInst)); - StartExprs.push_back(IVSCEV->getStart()); - IncrExprs.push_back(SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), IVSCEV)); - } - - // Remove instructions associated with non-base iterations. - for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(*Header))) { - unsigned I = Uses[&Inst].find_first(); - if (I > 0 && I < IL_All) { - LLVM_DEBUG(dbgs() << "LRR: removing: " << Inst << "\n"); - Inst.eraseFromParent(); - } - } - - // Rewrite each BaseInst using SCEV. - for (size_t i = 0, e = RootSets.size(); i != e; ++i) - // Insert the new induction variable. - replaceIV(RootSets[i], StartExprs[i], IncrExprs[i]); - - { // Limit the lifetime of SCEVExpander. - BranchInst *BI = cast(Header->getTerminator()); - const DataLayout &DL = Header->getModule()->getDataLayout(); - SCEVExpander Expander(*SE, DL, "reroll"); - auto Zero = SE->getZero(BackedgeTakenCount->getType()); - auto One = SE->getOne(BackedgeTakenCount->getType()); - auto NewIVSCEV = SE->getAddRecExpr(Zero, One, L, SCEV::FlagAnyWrap); - Value *NewIV = - Expander.expandCodeFor(NewIVSCEV, BackedgeTakenCount->getType(), - Header->getFirstNonPHIOrDbg()); - // FIXME: This arithmetic can overflow. - auto TripCount = SE->getAddExpr(BackedgeTakenCount, One); - auto ScaledTripCount = SE->getMulExpr( - TripCount, SE->getConstant(BackedgeTakenCount->getType(), Scale)); - auto ScaledBECount = SE->getMinusSCEV(ScaledTripCount, One); - Value *TakenCount = - Expander.expandCodeFor(ScaledBECount, BackedgeTakenCount->getType(), - Header->getFirstNonPHIOrDbg()); - Value *Cond = - new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, TakenCount, "exitcond"); - BI->setCondition(Cond); - - if (BI->getSuccessor(1) != Header) - BI->swapSuccessors(); - } - - SimplifyInstructionsInBlock(Header, TLI); - DeleteDeadPHIs(Header, TLI); -} - -void LoopReroll::DAGRootTracker::replaceIV(DAGRootSet &DRS, - const SCEV *Start, - const SCEV *IncrExpr) { - BasicBlock *Header = L->getHeader(); - Instruction *Inst = DRS.BaseInst; - - const SCEV *NewIVSCEV = - SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap); - - { // Limit the lifetime of SCEVExpander. - const DataLayout &DL = Header->getModule()->getDataLayout(); - SCEVExpander Expander(*SE, DL, "reroll"); - Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(), - Header->getFirstNonPHIOrDbg()); - - for (auto &KV : Uses) - if (KV.second.find_first() == 0) - KV.first->replaceUsesOfWith(Inst, NewIV); - } -} - -// Validate the selected reductions. All iterations must have an isomorphic -// part of the reduction chain and, for non-associative reductions, the chain -// entries must appear in order. -bool LoopReroll::ReductionTracker::validateSelected() { - // For a non-associative reduction, the chain entries must appear in order. - for (int i : Reds) { - int PrevIter = 0, BaseCount = 0, Count = 0; - for (Instruction *J : PossibleReds[i]) { - // Note that all instructions in the chain must have been found because - // all instructions in the function must have been assigned to some - // iteration. - int Iter = PossibleRedIter[J]; - if (Iter != PrevIter && Iter != PrevIter + 1 && - !PossibleReds[i].getReducedValue()->isAssociative()) { - LLVM_DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " - << J << "\n"); - return false; - } - - if (Iter != PrevIter) { - if (Count != BaseCount) { - LLVM_DEBUG(dbgs() - << "LRR: Iteration " << PrevIter << " reduction use count " - << Count << " is not equal to the base use count " - << BaseCount << "\n"); - return false; - } - - Count = 0; - } - - ++Count; - if (Iter == 0) - ++BaseCount; - - PrevIter = Iter; - } - } - - return true; -} - -// For all selected reductions, remove all parts except those in the first -// iteration (and the PHI). Replace outside uses of the reduced value with uses -// of the first-iteration reduced value (in other words, reroll the selected -// reductions). -void LoopReroll::ReductionTracker::replaceSelected() { - // Fixup reductions to refer to the last instruction associated with the - // first iteration (not the last). - for (int i : Reds) { - int j = 0; - for (int e = PossibleReds[i].size(); j != e; ++j) - if (PossibleRedIter[PossibleReds[i][j]] != 0) { - --j; - break; - } - - // Replace users with the new end-of-chain value. - SmallInstructionVector Users; - for (User *U : PossibleReds[i].getReducedValue()->users()) { - Users.push_back(cast(U)); - } - - for (Instruction *User : Users) - User->replaceUsesOfWith(PossibleReds[i].getReducedValue(), - PossibleReds[i][j]); - } -} - -// Reroll the provided loop with respect to the provided induction variable. -// Generally, we're looking for a loop like this: -// -// %iv = phi [ (preheader, ...), (body, %iv.next) ] -// f(%iv) -// %iv.1 = add %iv, 1 <-- a root increment -// f(%iv.1) -// %iv.2 = add %iv, 2 <-- a root increment -// f(%iv.2) -// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment -// f(%iv.scale_m_1) -// ... -// %iv.next = add %iv, scale -// %cmp = icmp(%iv, ...) -// br %cmp, header, exit -// -// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of -// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can -// be intermixed with eachother. The restriction imposed by this algorithm is -// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1), -// etc. be the same. -// -// First, we collect the use set of %iv, excluding the other increment roots. -// This gives us f(%iv). Then we iterate over the loop instructions (scale-1) -// times, having collected the use set of f(%iv.(i+1)), during which we: -// - Ensure that the next unmatched instruction in f(%iv) is isomorphic to -// the next unmatched instruction in f(%iv.(i+1)). -// - Ensure that both matched instructions don't have any external users -// (with the exception of last-in-chain reduction instructions). -// - Track the (aliasing) write set, and other side effects, of all -// instructions that belong to future iterations that come before the matched -// instructions. If the matched instructions read from that write set, then -// f(%iv) or f(%iv.(i+1)) has some dependency on instructions in -// f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly, -// if any of these future instructions had side effects (could not be -// speculatively executed), and so do the matched instructions, when we -// cannot reorder those side-effect-producing instructions, and rerolling -// fails. -// -// Finally, we make sure that all loop instructions are either loop increment -// roots, belong to simple latch code, parts of validated reductions, part of -// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions -// have been validated), then we reroll the loop. -bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, - const SCEV *BackedgeTakenCount, - ReductionTracker &Reductions) { - DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA, - IVToIncMap, LoopControlIVs); - - if (!DAGRoots.findRoots()) - return false; - LLVM_DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *IV - << "\n"); - - if (!DAGRoots.validate(Reductions)) - return false; - if (!Reductions.validateSelected()) - return false; - // At this point, we've validated the rerolling, and we're committed to - // making changes! - - Reductions.replaceSelected(); - DAGRoots.replace(BackedgeTakenCount); - - ++NumRerolledLoops; - return true; -} - -bool LoopReroll::runOnLoop(Loop *L) { - BasicBlock *Header = L->getHeader(); - LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %" - << Header->getName() << " (" << L->getNumBlocks() - << " block(s))\n"); - - // For now, we'll handle only single BB loops. - if (L->getNumBlocks() > 1) - return false; - - if (!SE->hasLoopInvariantBackedgeTakenCount(L)) - return false; - - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); - LLVM_DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n"); - LLVM_DEBUG(dbgs() << "LRR: backedge-taken count = " << *BackedgeTakenCount - << "\n"); - - // First, we need to find the induction variable with respect to which we can - // reroll (there may be several possible options). - SmallInstructionVector PossibleIVs; - IVToIncMap.clear(); - LoopControlIVs.clear(); - collectPossibleIVs(L, PossibleIVs); - - if (PossibleIVs.empty()) { - LLVM_DEBUG(dbgs() << "LRR: No possible IVs found\n"); - return false; - } - - ReductionTracker Reductions; - collectPossibleReductions(L, Reductions); - bool Changed = false; - - // For each possible IV, collect the associated possible set of 'root' nodes - // (i+1, i+2, etc.). - for (Instruction *PossibleIV : PossibleIVs) - if (reroll(PossibleIV, L, Header, BackedgeTakenCount, Reductions)) { - Changed = true; - break; - } - LLVM_DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n"); - - // Trip count of L has changed so SE must be re-evaluated. - if (Changed) - SE->forgetLoop(L); - - return Changed; -} - -PreservedAnalyses LoopRerollPass::run(Loop &L, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AR, - LPMUpdater &U) { - return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L) - ? getLoopPassPreservedAnalyses() - : PreservedAnalyses::all(); -} diff --git a/llvm/test/Transforms/LoopReroll/basic.ll b/llvm/test/Transforms/LoopReroll/basic.ll deleted file mode 100644 index 92d3456505759..0000000000000 --- a/llvm/test/Transforms/LoopReroll/basic.ll +++ /dev/null @@ -1,976 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt < %s -passes=loop-reroll -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; int foo(int a); -; void bar(int *x) { -; for (int i = 0; i < 500; i += 3) { -; foo(i); -; foo(i+1); -; foo(i+2); -; } -; } - -define void @bar(ptr nocapture readnone %x) #0 { -; CHECK-LABEL: define void @bar -; CHECK-SAME: (ptr nocapture readnone [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i32 [[INDVAR]]) #[[ATTR1:[0-9]+]] -; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i32 [[INDVAR]], 500 -; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %for.body, %entry - %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ] - %call = tail call i32 @foo(i32 %i.08) #1 - %add = add nsw i32 %i.08, 1 - %call1 = tail call i32 @foo(i32 %add) #1 - %add2 = add nsw i32 %i.08, 2 - %call3 = tail call i32 @foo(i32 %add2) #1 - %add3 = add nsw i32 %i.08, 3 - %exitcond = icmp sge i32 %add3, 500 - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body - ret void -} - -declare i32 @foo(i32) - -; void hi1(int *x) { -; for (int i = 0; i < 1500; i += 3) { -; x[i] = foo(0); -; x[i+1] = foo(0); -; x[i+2] = foo(0); -; } -; } - -; Function Attrs: nounwind uwtable -define void @hi1(ptr nocapture %x) #0 { -; CHECK-LABEL: define void @hi1 -; CHECK-SAME: (ptr nocapture [[X:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i32 0) #[[ATTR1]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[INDVAR]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP0]], 1499 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %call = tail call i32 @foo(i32 0) #1 - %arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvars.iv - store i32 %call, ptr %arrayidx, align 4 - %call1 = tail call i32 @foo(i32 0) #1 - %0 = add nsw i64 %indvars.iv, 1 - %arrayidx3 = getelementptr inbounds i32, ptr %x, i64 %0 - store i32 %call1, ptr %arrayidx3, align 4 - %call4 = tail call i32 @foo(i32 0) #1 - %1 = add nsw i64 %indvars.iv, 2 - %arrayidx7 = getelementptr inbounds i32, ptr %x, i64 %1 - store i32 %call4, ptr %arrayidx7, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3 - %2 = trunc i64 %indvars.iv.next to i32 - %cmp = icmp slt i32 %2, 1500 - br i1 %cmp, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - -; void hi2(int *x) { -; for (int i = 0; i < 500; ++i) { -; x[3*i] = foo(0); -; x[3*i+1] = foo(0); -; x[3*i+2] = foo(0); -; } -; } - -; Function Attrs: nounwind uwtable -define void @hi2(ptr nocapture %x) #0 { -; CHECK-LABEL: define void @hi2 -; CHECK-SAME: (ptr nocapture [[X:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i32 0) #[[ATTR1]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1499 -; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %for.body, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %call = tail call i32 @foo(i32 0) #1 - %0 = mul nsw i64 %indvars.iv, 3 - %arrayidx = getelementptr inbounds i32, ptr %x, i64 %0 - store i32 %call, ptr %arrayidx, align 4 - %call1 = tail call i32 @foo(i32 0) #1 - %1 = add nsw i64 %0, 1 - %arrayidx4 = getelementptr inbounds i32, ptr %x, i64 %1 - store i32 %call1, ptr %arrayidx4, align 4 - %call5 = tail call i32 @foo(i32 0) #1 - %2 = add nsw i64 %0, 2 - %arrayidx9 = getelementptr inbounds i32, ptr %x, i64 %2 - store i32 %call5, ptr %arrayidx9, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 500 - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body - ret void -} - -; void goo(float alpha, float *a, float *b) { -; for (int i = 0; i < 3200; i += 5) { -; a[i] += alpha * b[i]; -; a[i + 1] += alpha * b[i + 1]; -; a[i + 2] += alpha * b[i + 2]; -; a[i + 3] += alpha * b[i + 3]; -; a[i + 4] += alpha * b[i + 4]; -; } -; } - -; Function Attrs: nounwind uwtable -define void @goo(float %alpha, ptr nocapture %a, ptr nocapture readonly %b) #0 { -; CHECK-LABEL: define void @goo -; CHECK-SAME: (float [[ALPHA:%.*]], ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVAR]] -; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP1]], [[ALPHA]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVAR]] -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[MUL]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP0]], 3199 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv - %0 = load float, ptr %arrayidx, align 4 - %mul = fmul float %0, %alpha - %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv - %1 = load float, ptr %arrayidx2, align 4 - %add = fadd float %1, %mul - store float %add, ptr %arrayidx2, align 4 - %2 = add nsw i64 %indvars.iv, 1 - %arrayidx5 = getelementptr inbounds float, ptr %b, i64 %2 - %3 = load float, ptr %arrayidx5, align 4 - %mul6 = fmul float %3, %alpha - %arrayidx9 = getelementptr inbounds float, ptr %a, i64 %2 - %4 = load float, ptr %arrayidx9, align 4 - %add10 = fadd float %4, %mul6 - store float %add10, ptr %arrayidx9, align 4 - %5 = add nsw i64 %indvars.iv, 2 - %arrayidx13 = getelementptr inbounds float, ptr %b, i64 %5 - %6 = load float, ptr %arrayidx13, align 4 - %mul14 = fmul float %6, %alpha - %arrayidx17 = getelementptr inbounds float, ptr %a, i64 %5 - %7 = load float, ptr %arrayidx17, align 4 - %add18 = fadd float %7, %mul14 - store float %add18, ptr %arrayidx17, align 4 - %8 = add nsw i64 %indvars.iv, 3 - %arrayidx21 = getelementptr inbounds float, ptr %b, i64 %8 - %9 = load float, ptr %arrayidx21, align 4 - %mul22 = fmul float %9, %alpha - %arrayidx25 = getelementptr inbounds float, ptr %a, i64 %8 - %10 = load float, ptr %arrayidx25, align 4 - %add26 = fadd float %10, %mul22 - store float %add26, ptr %arrayidx25, align 4 - %11 = add nsw i64 %indvars.iv, 4 - %arrayidx29 = getelementptr inbounds float, ptr %b, i64 %11 - %12 = load float, ptr %arrayidx29, align 4 - %mul30 = fmul float %12, %alpha - %arrayidx33 = getelementptr inbounds float, ptr %a, i64 %11 - %13 = load float, ptr %arrayidx33, align 4 - %add34 = fadd float %13, %mul30 - store float %add34, ptr %arrayidx33, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5 - %14 = trunc i64 %indvars.iv.next to i32 - %cmp = icmp slt i32 %14, 3200 - br i1 %cmp, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - -; void hoo(float alpha, float *a, float *b, int *ip) { -; for (int i = 0; i < 3200; i += 5) { -; a[i] += alpha * b[ip[i]]; -; a[i + 1] += alpha * b[ip[i + 1]]; -; a[i + 2] += alpha * b[ip[i + 2]]; -; a[i + 3] += alpha * b[ip[i + 3]]; -; a[i + 4] += alpha * b[ip[i + 4]]; -; } -; } - -; Function Attrs: nounwind uwtable -define void @hoo(float %alpha, ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %ip) #0 { -; CHECK-LABEL: define void @hoo -; CHECK-SAME: (float [[ALPHA:%.*]], ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]], ptr nocapture readonly [[IP:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IP]], i64 [[INDVAR]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IDXPROM1]] -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP2]], [[ALPHA]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVAR]] -; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[MUL]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP0]], 3199 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %ip, i64 %indvars.iv - %0 = load i32, ptr %arrayidx, align 4 - %idxprom1 = sext i32 %0 to i64 - %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %idxprom1 - %1 = load float, ptr %arrayidx2, align 4 - %mul = fmul float %1, %alpha - %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv - %2 = load float, ptr %arrayidx4, align 4 - %add = fadd float %2, %mul - store float %add, ptr %arrayidx4, align 4 - %3 = add nsw i64 %indvars.iv, 1 - %arrayidx7 = getelementptr inbounds i32, ptr %ip, i64 %3 - %4 = load i32, ptr %arrayidx7, align 4 - %idxprom8 = sext i32 %4 to i64 - %arrayidx9 = getelementptr inbounds float, ptr %b, i64 %idxprom8 - %5 = load float, ptr %arrayidx9, align 4 - %mul10 = fmul float %5, %alpha - %arrayidx13 = getelementptr inbounds float, ptr %a, i64 %3 - %6 = load float, ptr %arrayidx13, align 4 - %add14 = fadd float %6, %mul10 - store float %add14, ptr %arrayidx13, align 4 - %7 = add nsw i64 %indvars.iv, 2 - %arrayidx17 = getelementptr inbounds i32, ptr %ip, i64 %7 - %8 = load i32, ptr %arrayidx17, align 4 - %idxprom18 = sext i32 %8 to i64 - %arrayidx19 = getelementptr inbounds float, ptr %b, i64 %idxprom18 - %9 = load float, ptr %arrayidx19, align 4 - %mul20 = fmul float %9, %alpha - %arrayidx23 = getelementptr inbounds float, ptr %a, i64 %7 - %10 = load float, ptr %arrayidx23, align 4 - %add24 = fadd float %10, %mul20 - store float %add24, ptr %arrayidx23, align 4 - %11 = add nsw i64 %indvars.iv, 3 - %arrayidx27 = getelementptr inbounds i32, ptr %ip, i64 %11 - %12 = load i32, ptr %arrayidx27, align 4 - %idxprom28 = sext i32 %12 to i64 - %arrayidx29 = getelementptr inbounds float, ptr %b, i64 %idxprom28 - %13 = load float, ptr %arrayidx29, align 4 - %mul30 = fmul float %13, %alpha - %arrayidx33 = getelementptr inbounds float, ptr %a, i64 %11 - %14 = load float, ptr %arrayidx33, align 4 - %add34 = fadd float %14, %mul30 - store float %add34, ptr %arrayidx33, align 4 - %15 = add nsw i64 %indvars.iv, 4 - %arrayidx37 = getelementptr inbounds i32, ptr %ip, i64 %15 - %16 = load i32, ptr %arrayidx37, align 4 - %idxprom38 = sext i32 %16 to i64 - %arrayidx39 = getelementptr inbounds float, ptr %b, i64 %idxprom38 - %17 = load float, ptr %arrayidx39, align 4 - %mul40 = fmul float %17, %alpha - %arrayidx43 = getelementptr inbounds float, ptr %a, i64 %15 - %18 = load float, ptr %arrayidx43, align 4 - %add44 = fadd float %18, %mul40 - store float %add44, ptr %arrayidx43, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5 - %19 = trunc i64 %indvars.iv.next to i32 - %cmp = icmp slt i32 %19, 3200 - br i1 %cmp, label %for.body, label %for.end - - - - -for.end: ; preds = %for.body - ret void -} - -; void multi1(int *x) { -; y = foo(0) -; for (int i = 0; i < 500; ++i) { -; x[3*i] = y; -; x[3*i+1] = y; -; x[3*i+2] = y; -; x[3*i+6] = y; -; x[3*i+7] = y; -; x[3*i+8] = y; -; } -; } - -; Function Attrs: nounwind uwtable -define void @multi1(ptr nocapture %x) #0 { -; CHECK-LABEL: define void @multi1 -; CHECK-SAME: (ptr nocapture [[X:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i32 0) #[[ATTR1]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDVARS_IV]], 6 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[TMP0]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1499 -; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - %call = tail call i32 @foo(i32 0) #1 - br label %for.body - -for.body: ; preds = %for.body, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %0 = mul nsw i64 %indvars.iv, 3 - %arrayidx = getelementptr inbounds i32, ptr %x, i64 %0 - store i32 %call, ptr %arrayidx, align 4 - %1 = add nsw i64 %0, 1 - %arrayidx4 = getelementptr inbounds i32, ptr %x, i64 %1 - store i32 %call, ptr %arrayidx4, align 4 - %2 = add nsw i64 %0, 2 - %arrayidx9 = getelementptr inbounds i32, ptr %x, i64 %2 - store i32 %call, ptr %arrayidx9, align 4 - %3 = add nsw i64 %0, 6 - %arrayidx6 = getelementptr inbounds i32, ptr %x, i64 %3 - store i32 %call, ptr %arrayidx6, align 4 - %4 = add nsw i64 %0, 7 - %arrayidx7 = getelementptr inbounds i32, ptr %x, i64 %4 - store i32 %call, ptr %arrayidx7, align 4 - %5 = add nsw i64 %0, 8 - %arrayidx8 = getelementptr inbounds i32, ptr %x, i64 %5 - store i32 %call, ptr %arrayidx8, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 500 - br i1 %exitcond, label %for.end, label %for.body - - - -for.end: ; preds = %for.body - ret void -} - -; void multi2(int *x) { -; y = foo(0) -; for (int i = 0; i < 500; ++i) { -; x[3*i] = y; -; x[3*i+1] = y; -; x[3*i+2] = y; -; x[3*(i+1)] = y; -; x[3*(i+1)+1] = y; -; x[3*(i+1)+2] = y; -; } -; } - -; Function Attrs: nounwind uwtable -define void @multi2(ptr nocapture %x) #0 { -; CHECK-LABEL: define void @multi2 -; CHECK-SAME: (ptr nocapture [[X:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i32 0) #[[ATTR1]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[TMP0]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1499 -; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - %call = tail call i32 @foo(i32 0) #1 - br label %for.body - -for.body: ; preds = %for.body, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %0 = mul nsw i64 %indvars.iv, 3 - %add = add nsw i64 %indvars.iv, 1 - %newmul = mul nsw i64 %add, 3 - %arrayidx = getelementptr inbounds i32, ptr %x, i64 %0 - store i32 %call, ptr %arrayidx, align 4 - %1 = add nsw i64 %0, 1 - %arrayidx4 = getelementptr inbounds i32, ptr %x, i64 %1 - store i32 %call, ptr %arrayidx4, align 4 - %2 = add nsw i64 %0, 2 - %arrayidx9 = getelementptr inbounds i32, ptr %x, i64 %2 - store i32 %call, ptr %arrayidx9, align 4 - %arrayidx6 = getelementptr inbounds i32, ptr %x, i64 %newmul - store i32 %call, ptr %arrayidx6, align 4 - %3 = add nsw i64 %newmul, 1 - %arrayidx7 = getelementptr inbounds i32, ptr %x, i64 %3 - store i32 %call, ptr %arrayidx7, align 4 - %4 = add nsw i64 %newmul, 2 - %arrayidx8 = getelementptr inbounds i32, ptr %x, i64 %4 - store i32 %call, ptr %arrayidx8, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 500 - br i1 %exitcond, label %for.end, label %for.body - - - -for.end: ; preds = %for.body - ret void -} - -; void multi3(int *x) { -; y = foo(0) -; for (int i = 0; i < 500; ++i) { -; // Note: No zero index -; x[3*i+3] = y; -; x[3*i+4] = y; -; x[3*i+5] = y; -; } -; } - -; Function Attrs: nounwind uwtable -define void @multi3(ptr nocapture %x) #0 { -; CHECK-LABEL: define void @multi3 -; CHECK-SAME: (ptr nocapture [[X:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i32 0) #[[ATTR1]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[TMP0]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1499 -; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - %call = tail call i32 @foo(i32 0) #1 - br label %for.body - -for.body: ; preds = %for.body, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %0 = mul nsw i64 %indvars.iv, 3 - %x0 = add nsw i64 %0, 3 - %add = add nsw i64 %indvars.iv, 1 - %arrayidx = getelementptr inbounds i32, ptr %x, i64 %x0 - store i32 %call, ptr %arrayidx, align 4 - %1 = add nsw i64 %0, 4 - %arrayidx4 = getelementptr inbounds i32, ptr %x, i64 %1 - store i32 %call, ptr %arrayidx4, align 4 - %2 = add nsw i64 %0, 5 - %arrayidx9 = getelementptr inbounds i32, ptr %x, i64 %2 - store i32 %call, ptr %arrayidx9, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 500 - br i1 %exitcond, label %for.end, label %for.body - - -for.end: ; preds = %for.body - ret void -} - -; int foo(int a); -; void bar2(int *x, int y, int z) { -; for (int i = 0; i < 500; i += 3) { -; foo(i+y+i*z); // Slightly reordered instruction order -; foo(i+1+y+(i+1)*z); -; foo(i+2+y+(i+2)*z); -; } -; } - -; Function Attrs: nounwind uwtable -define void @bar2(ptr nocapture readnone %x, i32 %y, i32 %z) #0 { -; CHECK-LABEL: define void @bar2 -; CHECK-SAME: (ptr nocapture readnone [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDVAR]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[INDVAR]], [[Z]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i32 [[TMP3]]) #[[ATTR1]] -; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i32 [[INDVAR]], 500 -; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %for.body, %entry - %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ] - - %tmp1 = add i32 %i.08, %y - %tmp2 = mul i32 %i.08, %z - %tmp3 = add i32 %tmp2, %tmp1 - %call = tail call i32 @foo(i32 %tmp3) #1 - - %add = add nsw i32 %i.08, 1 - %tmp2a = mul i32 %add, %z - %tmp1a = add i32 %add, %y - %tmp3a = add i32 %tmp2a, %tmp1a - %calla = tail call i32 @foo(i32 %tmp3a) #1 - - %add2 = add nsw i32 %i.08, 2 - %tmp2b = mul i32 %add2, %z - %tmp1b = add i32 %add2, %y - %tmp3b = add i32 %tmp2b, %tmp1b - %callb = tail call i32 @foo(i32 %tmp3b) #1 - - %add3 = add nsw i32 %i.08, 3 - - %exitcond = icmp sge i32 %add3, 500 - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body - ret void -} - -%struct.s = type { i32, i32 } - -; Function Attrs: nounwind uwtable -define void @gep1(ptr nocapture %x) #0 { -; CHECK-LABEL: define void @gep1 -; CHECK-SAME: (ptr nocapture [[X:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i32 0) #[[ATTR1]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[X]], i64 [[TMP0]], i32 0 -; CHECK-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[X]], i64 [[TMP1]], i32 0 -; CHECK-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[X]], i64 [[TMP2]], i32 0 -; CHECK-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 500 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - %call = tail call i32 @foo(i32 0) #1 - br label %for.body - -for.body: ; preds = %for.body, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %0 = mul nsw i64 %indvars.iv, 3 - %arrayidx = getelementptr inbounds %struct.s, ptr %x, i64 %0, i32 0 - store i32 %call, ptr %arrayidx, align 4 - %1 = add nsw i64 %0, 1 - %arrayidx4 = getelementptr inbounds %struct.s, ptr %x, i64 %1, i32 0 - store i32 %call, ptr %arrayidx4, align 4 - %2 = add nsw i64 %0, 2 - %arrayidx9 = getelementptr inbounds %struct.s, ptr %x, i64 %2, i32 0 - store i32 %call, ptr %arrayidx9, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 500 - br i1 %exitcond, label %for.end, label %for.body - -; This test is a crash test only. -for.end: ; preds = %for.body - ret void -} - -define void @gep-indexing(ptr nocapture %x) { -; CHECK-LABEL: define void @gep-indexing -; CHECK-SAME: (ptr nocapture [[X:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i32 0) #[[ATTR1]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[X]], i64 [[TMP0]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[SCEVGEP]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1499 -; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - %call = tail call i32 @foo(i32 0) #1 - br label %for.body - -for.body: ; preds = %for.body, %entry - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %0 = mul nsw i64 %indvars.iv, 3 - %arrayidx = getelementptr inbounds i32, ptr %x, i64 %0 - store i32 %call, ptr %arrayidx, align 4 - %arrayidx4 = getelementptr inbounds i32, ptr %arrayidx, i64 1 - store i32 %call, ptr %arrayidx4, align 4 - %arrayidx9 = getelementptr inbounds i32, ptr %arrayidx, i64 2 - store i32 %call, ptr %arrayidx9, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 500 - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body - ret void -} - - -define void @unordered_atomic_ops(ptr noalias %buf_0, ptr noalias %buf_1) { -; CHECK-LABEL: define void @unordered_atomic_ops -; CHECK-SAME: (ptr noalias [[BUF_0:%.*]], ptr noalias [[BUF_1:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BUF0_A:%.*]] = getelementptr i32, ptr [[BUF_0]], i32 [[INDVAR]] -; CHECK-NEXT: [[BUF1_A:%.*]] = getelementptr i32, ptr [[BUF_1]], i32 [[INDVAR]] -; CHECK-NEXT: [[VA:%.*]] = load atomic i32, ptr [[BUF0_A]] unordered, align 4 -; CHECK-NEXT: store atomic i32 [[VA]], ptr [[BUF1_A]] unordered, align 4 -; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], 3199 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: - %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %indvars.iv.next = add i32 %indvars.iv, 2 - %indvars.mid = add i32 %indvars.iv, 1 - %buf0_a = getelementptr i32, ptr %buf_0, i32 %indvars.iv - %buf0_b = getelementptr i32, ptr %buf_0, i32 %indvars.mid - %buf1_a = getelementptr i32, ptr %buf_1, i32 %indvars.iv - %buf1_b = getelementptr i32, ptr %buf_1, i32 %indvars.mid - %va = load atomic i32, ptr %buf0_a unordered, align 4 - %vb = load atomic i32, ptr %buf0_b unordered, align 4 - store atomic i32 %va, ptr %buf1_a unordered, align 4 - store atomic i32 %vb, ptr %buf1_b unordered, align 4 - %cmp = icmp slt i32 %indvars.iv.next, 3200 - br i1 %cmp, label %for.body, label %for.end - -for.end: - ret void -} - -define void @unordered_atomic_ops_nomatch(ptr noalias %buf_0, ptr noalias %buf_1) { -; Negative test -; CHECK-LABEL: define void @unordered_atomic_ops_nomatch -; CHECK-SAME: (ptr noalias [[BUF_0:%.*]], ptr noalias [[BUF_1:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[INDVARS_MID:%.*]] = add i32 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[BUF0_A:%.*]] = getelementptr i32, ptr [[BUF_0]], i32 [[INDVARS_IV]] -; CHECK-NEXT: [[BUF0_B:%.*]] = getelementptr i32, ptr [[BUF_0]], i32 [[INDVARS_MID]] -; CHECK-NEXT: [[BUF1_A:%.*]] = getelementptr i32, ptr [[BUF_1]], i32 [[INDVARS_IV]] -; CHECK-NEXT: [[BUF1_B:%.*]] = getelementptr i32, ptr [[BUF_1]], i32 [[INDVARS_MID]] -; CHECK-NEXT: [[VA:%.*]] = load atomic i32, ptr [[BUF0_A]] unordered, align 4 -; CHECK-NEXT: [[VB:%.*]] = load atomic i32, ptr [[BUF0_B]] unordered, align 4 -; CHECK-NEXT: store i32 [[VA]], ptr [[BUF1_A]], align 4 -; CHECK-NEXT: store atomic i32 [[VB]], ptr [[BUF1_B]] unordered, align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INDVARS_IV_NEXT]], 3200 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: - - %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %indvars.iv.next = add i32 %indvars.iv, 2 - %indvars.mid = add i32 %indvars.iv, 1 - %buf0_a = getelementptr i32, ptr %buf_0, i32 %indvars.iv - %buf0_b = getelementptr i32, ptr %buf_0, i32 %indvars.mid - %buf1_a = getelementptr i32, ptr %buf_1, i32 %indvars.iv - %buf1_b = getelementptr i32, ptr %buf_1, i32 %indvars.mid - %va = load atomic i32, ptr %buf0_a unordered, align 4 - %vb = load atomic i32, ptr %buf0_b unordered, align 4 - store i32 %va, ptr %buf1_a, align 4 ;; Not atomic - store atomic i32 %vb, ptr %buf1_b unordered, align 4 - %cmp = icmp slt i32 %indvars.iv.next, 3200 - br i1 %cmp, label %for.body, label %for.end - -for.end: - ret void -} - -define void @ordered_atomic_ops(ptr noalias %buf_0, ptr noalias %buf_1) { -; Negative test -; CHECK-LABEL: define void @ordered_atomic_ops -; CHECK-SAME: (ptr noalias [[BUF_0:%.*]], ptr noalias [[BUF_1:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[INDVARS_MID:%.*]] = add i32 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[BUF0_A:%.*]] = getelementptr i32, ptr [[BUF_0]], i32 [[INDVARS_IV]] -; CHECK-NEXT: [[BUF0_B:%.*]] = getelementptr i32, ptr [[BUF_0]], i32 [[INDVARS_MID]] -; CHECK-NEXT: [[BUF1_A:%.*]] = getelementptr i32, ptr [[BUF_1]], i32 [[INDVARS_IV]] -; CHECK-NEXT: [[BUF1_B:%.*]] = getelementptr i32, ptr [[BUF_1]], i32 [[INDVARS_MID]] -; CHECK-NEXT: [[VA:%.*]] = load atomic i32, ptr [[BUF0_A]] acquire, align 4 -; CHECK-NEXT: [[VB:%.*]] = load atomic i32, ptr [[BUF0_B]] acquire, align 4 -; CHECK-NEXT: store atomic i32 [[VA]], ptr [[BUF1_A]] release, align 4 -; CHECK-NEXT: store atomic i32 [[VB]], ptr [[BUF1_B]] release, align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INDVARS_IV_NEXT]], 3200 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: - - %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %indvars.iv.next = add i32 %indvars.iv, 2 - %indvars.mid = add i32 %indvars.iv, 1 - %buf0_a = getelementptr i32, ptr %buf_0, i32 %indvars.iv - %buf0_b = getelementptr i32, ptr %buf_0, i32 %indvars.mid - %buf1_a = getelementptr i32, ptr %buf_1, i32 %indvars.iv - %buf1_b = getelementptr i32, ptr %buf_1, i32 %indvars.mid - %va = load atomic i32, ptr %buf0_a acquire, align 4 - %vb = load atomic i32, ptr %buf0_b acquire, align 4 - store atomic i32 %va, ptr %buf1_a release, align 4 - store atomic i32 %vb, ptr %buf1_b release, align 4 - %cmp = icmp slt i32 %indvars.iv.next, 3200 - br i1 %cmp, label %for.body, label %for.end - -for.end: - ret void -} - -define void @unordered_atomic_ops_with_fence(ptr noalias %buf_0, ptr noalias %buf_1) { -; CHECK-LABEL: define void @unordered_atomic_ops_with_fence -; CHECK-SAME: (ptr noalias [[BUF_0:%.*]], ptr noalias [[BUF_1:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[INDVARS_MID:%.*]] = add i32 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[BUF0_A:%.*]] = getelementptr i32, ptr [[BUF_0]], i32 [[INDVARS_IV]] -; CHECK-NEXT: [[BUF0_B:%.*]] = getelementptr i32, ptr [[BUF_0]], i32 [[INDVARS_MID]] -; CHECK-NEXT: [[BUF1_A:%.*]] = getelementptr i32, ptr [[BUF_1]], i32 [[INDVARS_IV]] -; CHECK-NEXT: [[BUF1_B:%.*]] = getelementptr i32, ptr [[BUF_1]], i32 [[INDVARS_MID]] -; CHECK-NEXT: [[VA:%.*]] = load atomic i32, ptr [[BUF0_A]] unordered, align 4 -; CHECK-NEXT: [[VB:%.*]] = load atomic i32, ptr [[BUF0_B]] unordered, align 4 -; CHECK-NEXT: fence seq_cst -; CHECK-NEXT: store atomic i32 [[VA]], ptr [[BUF1_A]] unordered, align 4 -; CHECK-NEXT: store atomic i32 [[VB]], ptr [[BUF1_B]] unordered, align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INDVARS_IV_NEXT]], 3200 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: - - %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %indvars.iv.next = add i32 %indvars.iv, 2 - %indvars.mid = add i32 %indvars.iv, 1 - %buf0_a = getelementptr i32, ptr %buf_0, i32 %indvars.iv - %buf0_b = getelementptr i32, ptr %buf_0, i32 %indvars.mid - %buf1_a = getelementptr i32, ptr %buf_1, i32 %indvars.iv - %buf1_b = getelementptr i32, ptr %buf_1, i32 %indvars.mid - %va = load atomic i32, ptr %buf0_a unordered, align 4 - %vb = load atomic i32, ptr %buf0_b unordered, align 4 - fence seq_cst - store atomic i32 %va, ptr %buf1_a unordered, align 4 - store atomic i32 %vb, ptr %buf1_b unordered, align 4 - %cmp = icmp slt i32 %indvars.iv.next, 3200 - br i1 %cmp, label %for.body, label %for.end - -for.end: - ret void -} - -define void @pointer_bitcast_baseinst(ptr %arg, ptr %arg1, i64 %arg2) { -; CHECK-LABEL: define void @pointer_bitcast_baseinst -; CHECK-SAME: (ptr [[ARG:%.*]], ptr [[ARG1:%.*]], i64 [[ARG2:%.*]]) { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG2]], -17 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb3: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BB3]] ], [ 0, [[BB:%.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[INDVAR]], 3 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1 -; CHECK-NEXT: [[INST5:%.*]] = shl nuw i64 [[TMP5]], 1 -; CHECK-NEXT: [[INST6:%.*]] = getelementptr i8, ptr [[ARG1]], i64 [[INST5]] -; CHECK-NEXT: [[INST8:%.*]] = load <8 x i16>, ptr [[INST6]], align 2 -; CHECK-NEXT: [[INST13:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[TMP5]] -; CHECK-NEXT: store <8 x i16> [[INST8]], ptr [[INST13]], align 2 -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR]], [[TMP3]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB19:%.*]], label [[BB3]] -; CHECK: bb19: -; CHECK-NEXT: ret void -; -bb: - br label %bb3 - -bb3: ; preds = %bb3, %bb - %inst = phi i64 [ 1, %bb ], [ %inst17, %bb3 ] - %inst4 = add nuw i64 %inst, 8 - %inst5 = shl nuw i64 %inst, 1 - %inst6 = getelementptr i8, ptr %arg1, i64 %inst5 - %inst8 = load <8 x i16>, ptr %inst6, align 2 - %inst9 = shl i64 %inst4, 1 - %inst10 = getelementptr i8, ptr %arg1, i64 %inst9 - %inst12 = load <8 x i16>, ptr %inst10, align 2 - %inst13 = getelementptr i16, ptr %arg, i64 %inst - store <8 x i16> %inst8, ptr %inst13, align 2 - %inst15 = getelementptr i16, ptr %arg, i64 %inst4 - store <8 x i16> %inst12, ptr %inst15, align 2 - %inst17 = add nuw nsw i64 %inst, 16 - %inst18 = icmp eq i64 %inst17, %arg2 - br i1 %inst18, label %bb19, label %bb3 - -bb19: ; preds = %bb3 - ret void -} - -define void @bad_step(ptr nocapture readnone %x) #0 { -; CHECK-LABEL: define void @bad_step -; CHECK-SAME: (ptr nocapture readnone [[X:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo(i32 [[I_08]]) #[[ATTR1]] -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[I_08]], 2 -; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @foo(i32 [[ADD]]) #[[ATTR1]] -; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[I_08]], 3 -; CHECK-NEXT: [[CALL3:%.*]] = tail call i32 @foo(i32 [[ADD2]]) #[[ATTR1]] -; CHECK-NEXT: [[ADD3]] = add nsw i32 [[I_08]], 6 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sge i32 [[ADD3]], 500 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %for.body, %entry - %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ] - %call = tail call i32 @foo(i32 %i.08) #1 - %add = add nsw i32 %i.08, 2 - %call1 = tail call i32 @foo(i32 %add) #1 - %add2 = add nsw i32 %i.08, 3 - %call3 = tail call i32 @foo(i32 %add2) #1 - %add3 = add nsw i32 %i.08, 6 - %exitcond = icmp sge i32 %add3, 500 - br i1 %exitcond, label %for.end, label %for.body - - -for.end: ; preds = %for.body - ret void -} - -@a = external global [2 x [512 x i64]], align 16 -@b = external global [512 x [4 x i64]], align 16 - -define void @ptr_step_crash() { -; CHECK-LABEL: define void @ptr_step_crash() { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY42_3:%.*]] -; CHECK: for.body42.3: -; CHECK-NEXT: [[K_2207_3:%.*]] = phi i32 [ -512, [[ENTRY:%.*]] ], [ [[INC63_3:%.*]], [[FOR_BODY42_3]] ] -; CHECK-NEXT: [[SUB46_3:%.*]] = add nsw i32 [[K_2207_3]], 512 -; CHECK-NEXT: [[IDXPROM47_3:%.*]] = zext i32 [[SUB46_3]] to i64 -; CHECK-NEXT: [[ARRAYIDX48_3:%.*]] = getelementptr inbounds [2 x [512 x i64]], ptr @a, i64 0, i64 0, i64 [[IDXPROM47_3]] -; CHECK-NEXT: [[ARRAYIDX55_3:%.*]] = getelementptr inbounds [512 x [4 x i64]], ptr @b, i64 0, i64 [[IDXPROM47_3]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX55_3]], align 8 -; CHECK-NEXT: [[INC63_3]] = add nsw i32 [[K_2207_3]], 1 -; CHECK-NEXT: br i1 true, label [[FOR_INC65_3:%.*]], label [[FOR_BODY42_3]] -; CHECK: for.inc65.3: -; CHECK-NEXT: ret void -; -entry: - br label %for.body42.3 - -for.body42.3: ; preds = %for.body42.3, %entry - %k.2207.3 = phi i32 [ -512, %entry ], [ %inc63.3, %for.body42.3 ] - %sub46.3 = add nsw i32 %k.2207.3, 512 - %idxprom47.3 = zext i32 %sub46.3 to i64 - %arrayidx48.3 = getelementptr inbounds [2 x [512 x i64]], ptr @a, i64 0, i64 0, i64 %idxprom47.3 - %arrayidx55.3 = getelementptr inbounds [512 x [4 x i64]], ptr @b, i64 0, i64 %idxprom47.3, i64 3 - %0 = load i64, ptr %arrayidx55.3, align 8 - %inc63.3 = add nsw i32 %k.2207.3, 1 - br i1 undef, label %for.inc65.3, label %for.body42.3 - -for.inc65.3: ; preds = %for.body42.3 - ret void -} - -attributes #0 = { nounwind uwtable } -attributes #1 = { nounwind } diff --git a/llvm/test/Transforms/LoopReroll/basic32iters.ll b/llvm/test/Transforms/LoopReroll/basic32iters.ll deleted file mode 100644 index edf38cb3eb18d..0000000000000 --- a/llvm/test/Transforms/LoopReroll/basic32iters.ll +++ /dev/null @@ -1,328 +0,0 @@ -; RUN: opt < %s -passes=loop-reroll -verify-scev -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; void goo32(float alpha, float *a, float *b) { -; for (int i = 0; i < 3200; i += 32) { -; a[i] += alpha * b[i]; -; a[i + 1] += alpha * b[i + 1]; -; a[i + 2] += alpha * b[i + 2]; -; a[i + 3] += alpha * b[i + 3]; -; a[i + 4] += alpha * b[i + 4]; -; a[i + 5] += alpha * b[i + 5]; -; a[i + 6] += alpha * b[i + 6]; -; a[i + 7] += alpha * b[i + 7]; -; a[i + 8] += alpha * b[i + 8]; -; a[i + 9] += alpha * b[i + 9]; -; a[i + 10] += alpha * b[i + 10]; -; a[i + 11] += alpha * b[i + 11]; -; a[i + 12] += alpha * b[i + 12]; -; a[i + 13] += alpha * b[i + 13]; -; a[i + 14] += alpha * b[i + 14]; -; a[i + 15] += alpha * b[i + 15]; -; a[i + 16] += alpha * b[i + 16]; -; a[i + 17] += alpha * b[i + 17]; -; a[i + 18] += alpha * b[i + 18]; -; a[i + 19] += alpha * b[i + 19]; -; a[i + 20] += alpha * b[i + 20]; -; a[i + 21] += alpha * b[i + 21]; -; a[i + 22] += alpha * b[i + 22]; -; a[i + 23] += alpha * b[i + 23]; -; a[i + 24] += alpha * b[i + 24]; -; a[i + 25] += alpha * b[i + 25]; -; a[i + 26] += alpha * b[i + 26]; -; a[i + 27] += alpha * b[i + 27]; -; a[i + 28] += alpha * b[i + 28]; -; a[i + 29] += alpha * b[i + 29]; -; a[i + 30] += alpha * b[i + 30]; -; a[i + 31] += alpha * b[i + 31]; -; } -; } - -; Function Attrs: norecurse nounwind uwtable -define void @goo32(float %alpha, ptr %a, ptr readonly %b) #0 { -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv - %0 = load float, ptr %arrayidx, align 4 - %mul = fmul float %0, %alpha - %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv - %1 = load float, ptr %arrayidx2, align 4 - %add = fadd float %1, %mul - store float %add, ptr %arrayidx2, align 4 - %2 = or disjoint i64 %indvars.iv, 1 - %arrayidx5 = getelementptr inbounds float, ptr %b, i64 %2 - %3 = load float, ptr %arrayidx5, align 4 - %mul6 = fmul float %3, %alpha - %arrayidx9 = getelementptr inbounds float, ptr %a, i64 %2 - %4 = load float, ptr %arrayidx9, align 4 - %add10 = fadd float %4, %mul6 - store float %add10, ptr %arrayidx9, align 4 - %5 = or disjoint i64 %indvars.iv, 2 - %arrayidx13 = getelementptr inbounds float, ptr %b, i64 %5 - %6 = load float, ptr %arrayidx13, align 4 - %mul14 = fmul float %6, %alpha - %arrayidx17 = getelementptr inbounds float, ptr %a, i64 %5 - %7 = load float, ptr %arrayidx17, align 4 - %add18 = fadd float %7, %mul14 - store float %add18, ptr %arrayidx17, align 4 - %8 = or disjoint i64 %indvars.iv, 3 - %arrayidx21 = getelementptr inbounds float, ptr %b, i64 %8 - %9 = load float, ptr %arrayidx21, align 4 - %mul22 = fmul float %9, %alpha - %arrayidx25 = getelementptr inbounds float, ptr %a, i64 %8 - %10 = load float, ptr %arrayidx25, align 4 - %add26 = fadd float %10, %mul22 - store float %add26, ptr %arrayidx25, align 4 - %11 = or disjoint i64 %indvars.iv, 4 - %arrayidx29 = getelementptr inbounds float, ptr %b, i64 %11 - %12 = load float, ptr %arrayidx29, align 4 - %mul30 = fmul float %12, %alpha - %arrayidx33 = getelementptr inbounds float, ptr %a, i64 %11 - %13 = load float, ptr %arrayidx33, align 4 - %add34 = fadd float %13, %mul30 - store float %add34, ptr %arrayidx33, align 4 - %14 = or disjoint i64 %indvars.iv, 5 - %arrayidx37 = getelementptr inbounds float, ptr %b, i64 %14 - %15 = load float, ptr %arrayidx37, align 4 - %mul38 = fmul float %15, %alpha - %arrayidx41 = getelementptr inbounds float, ptr %a, i64 %14 - %16 = load float, ptr %arrayidx41, align 4 - %add42 = fadd float %16, %mul38 - store float %add42, ptr %arrayidx41, align 4 - %17 = or disjoint i64 %indvars.iv, 6 - %arrayidx45 = getelementptr inbounds float, ptr %b, i64 %17 - %18 = load float, ptr %arrayidx45, align 4 - %mul46 = fmul float %18, %alpha - %arrayidx49 = getelementptr inbounds float, ptr %a, i64 %17 - %19 = load float, ptr %arrayidx49, align 4 - %add50 = fadd float %19, %mul46 - store float %add50, ptr %arrayidx49, align 4 - %20 = or disjoint i64 %indvars.iv, 7 - %arrayidx53 = getelementptr inbounds float, ptr %b, i64 %20 - %21 = load float, ptr %arrayidx53, align 4 - %mul54 = fmul float %21, %alpha - %arrayidx57 = getelementptr inbounds float, ptr %a, i64 %20 - %22 = load float, ptr %arrayidx57, align 4 - %add58 = fadd float %22, %mul54 - store float %add58, ptr %arrayidx57, align 4 - %23 = or disjoint i64 %indvars.iv, 8 - %arrayidx61 = getelementptr inbounds float, ptr %b, i64 %23 - %24 = load float, ptr %arrayidx61, align 4 - %mul62 = fmul float %24, %alpha - %arrayidx65 = getelementptr inbounds float, ptr %a, i64 %23 - %25 = load float, ptr %arrayidx65, align 4 - %add66 = fadd float %25, %mul62 - store float %add66, ptr %arrayidx65, align 4 - %26 = or disjoint i64 %indvars.iv, 9 - %arrayidx69 = getelementptr inbounds float, ptr %b, i64 %26 - %27 = load float, ptr %arrayidx69, align 4 - %mul70 = fmul float %27, %alpha - %arrayidx73 = getelementptr inbounds float, ptr %a, i64 %26 - %28 = load float, ptr %arrayidx73, align 4 - %add74 = fadd float %28, %mul70 - store float %add74, ptr %arrayidx73, align 4 - %29 = or disjoint i64 %indvars.iv, 10 - %arrayidx77 = getelementptr inbounds float, ptr %b, i64 %29 - %30 = load float, ptr %arrayidx77, align 4 - %mul78 = fmul float %30, %alpha - %arrayidx81 = getelementptr inbounds float, ptr %a, i64 %29 - %31 = load float, ptr %arrayidx81, align 4 - %add82 = fadd float %31, %mul78 - store float %add82, ptr %arrayidx81, align 4 - %32 = or disjoint i64 %indvars.iv, 11 - %arrayidx85 = getelementptr inbounds float, ptr %b, i64 %32 - %33 = load float, ptr %arrayidx85, align 4 - %mul86 = fmul float %33, %alpha - %arrayidx89 = getelementptr inbounds float, ptr %a, i64 %32 - %34 = load float, ptr %arrayidx89, align 4 - %add90 = fadd float %34, %mul86 - store float %add90, ptr %arrayidx89, align 4 - %35 = or disjoint i64 %indvars.iv, 12 - %arrayidx93 = getelementptr inbounds float, ptr %b, i64 %35 - %36 = load float, ptr %arrayidx93, align 4 - %mul94 = fmul float %36, %alpha - %arrayidx97 = getelementptr inbounds float, ptr %a, i64 %35 - %37 = load float, ptr %arrayidx97, align 4 - %add98 = fadd float %37, %mul94 - store float %add98, ptr %arrayidx97, align 4 - %38 = or disjoint i64 %indvars.iv, 13 - %arrayidx101 = getelementptr inbounds float, ptr %b, i64 %38 - %39 = load float, ptr %arrayidx101, align 4 - %mul102 = fmul float %39, %alpha - %arrayidx105 = getelementptr inbounds float, ptr %a, i64 %38 - %40 = load float, ptr %arrayidx105, align 4 - %add106 = fadd float %40, %mul102 - store float %add106, ptr %arrayidx105, align 4 - %41 = or disjoint i64 %indvars.iv, 14 - %arrayidx109 = getelementptr inbounds float, ptr %b, i64 %41 - %42 = load float, ptr %arrayidx109, align 4 - %mul110 = fmul float %42, %alpha - %arrayidx113 = getelementptr inbounds float, ptr %a, i64 %41 - %43 = load float, ptr %arrayidx113, align 4 - %add114 = fadd float %43, %mul110 - store float %add114, ptr %arrayidx113, align 4 - %44 = or disjoint i64 %indvars.iv, 15 - %arrayidx117 = getelementptr inbounds float, ptr %b, i64 %44 - %45 = load float, ptr %arrayidx117, align 4 - %mul118 = fmul float %45, %alpha - %arrayidx121 = getelementptr inbounds float, ptr %a, i64 %44 - %46 = load float, ptr %arrayidx121, align 4 - %add122 = fadd float %46, %mul118 - store float %add122, ptr %arrayidx121, align 4 - %47 = or disjoint i64 %indvars.iv, 16 - %arrayidx125 = getelementptr inbounds float, ptr %b, i64 %47 - %48 = load float, ptr %arrayidx125, align 4 - %mul126 = fmul float %48, %alpha - %arrayidx129 = getelementptr inbounds float, ptr %a, i64 %47 - %49 = load float, ptr %arrayidx129, align 4 - %add130 = fadd float %49, %mul126 - store float %add130, ptr %arrayidx129, align 4 - %50 = or disjoint i64 %indvars.iv, 17 - %arrayidx133 = getelementptr inbounds float, ptr %b, i64 %50 - %51 = load float, ptr %arrayidx133, align 4 - %mul134 = fmul float %51, %alpha - %arrayidx137 = getelementptr inbounds float, ptr %a, i64 %50 - %52 = load float, ptr %arrayidx137, align 4 - %add138 = fadd float %52, %mul134 - store float %add138, ptr %arrayidx137, align 4 - %53 = or disjoint i64 %indvars.iv, 18 - %arrayidx141 = getelementptr inbounds float, ptr %b, i64 %53 - %54 = load float, ptr %arrayidx141, align 4 - %mul142 = fmul float %54, %alpha - %arrayidx145 = getelementptr inbounds float, ptr %a, i64 %53 - %55 = load float, ptr %arrayidx145, align 4 - %add146 = fadd float %55, %mul142 - store float %add146, ptr %arrayidx145, align 4 - %56 = or disjoint i64 %indvars.iv, 19 - %arrayidx149 = getelementptr inbounds float, ptr %b, i64 %56 - %57 = load float, ptr %arrayidx149, align 4 - %mul150 = fmul float %57, %alpha - %arrayidx153 = getelementptr inbounds float, ptr %a, i64 %56 - %58 = load float, ptr %arrayidx153, align 4 - %add154 = fadd float %58, %mul150 - store float %add154, ptr %arrayidx153, align 4 - %59 = or disjoint i64 %indvars.iv, 20 - %arrayidx157 = getelementptr inbounds float, ptr %b, i64 %59 - %60 = load float, ptr %arrayidx157, align 4 - %mul158 = fmul float %60, %alpha - %arrayidx161 = getelementptr inbounds float, ptr %a, i64 %59 - %61 = load float, ptr %arrayidx161, align 4 - %add162 = fadd float %61, %mul158 - store float %add162, ptr %arrayidx161, align 4 - %62 = or disjoint i64 %indvars.iv, 21 - %arrayidx165 = getelementptr inbounds float, ptr %b, i64 %62 - %63 = load float, ptr %arrayidx165, align 4 - %mul166 = fmul float %63, %alpha - %arrayidx169 = getelementptr inbounds float, ptr %a, i64 %62 - %64 = load float, ptr %arrayidx169, align 4 - %add170 = fadd float %64, %mul166 - store float %add170, ptr %arrayidx169, align 4 - %65 = or disjoint i64 %indvars.iv, 22 - %arrayidx173 = getelementptr inbounds float, ptr %b, i64 %65 - %66 = load float, ptr %arrayidx173, align 4 - %mul174 = fmul float %66, %alpha - %arrayidx177 = getelementptr inbounds float, ptr %a, i64 %65 - %67 = load float, ptr %arrayidx177, align 4 - %add178 = fadd float %67, %mul174 - store float %add178, ptr %arrayidx177, align 4 - %68 = or disjoint i64 %indvars.iv, 23 - %arrayidx181 = getelementptr inbounds float, ptr %b, i64 %68 - %69 = load float, ptr %arrayidx181, align 4 - %mul182 = fmul float %69, %alpha - %arrayidx185 = getelementptr inbounds float, ptr %a, i64 %68 - %70 = load float, ptr %arrayidx185, align 4 - %add186 = fadd float %70, %mul182 - store float %add186, ptr %arrayidx185, align 4 - %71 = or disjoint i64 %indvars.iv, 24 - %arrayidx189 = getelementptr inbounds float, ptr %b, i64 %71 - %72 = load float, ptr %arrayidx189, align 4 - %mul190 = fmul float %72, %alpha - %arrayidx193 = getelementptr inbounds float, ptr %a, i64 %71 - %73 = load float, ptr %arrayidx193, align 4 - %add194 = fadd float %73, %mul190 - store float %add194, ptr %arrayidx193, align 4 - %74 = or disjoint i64 %indvars.iv, 25 - %arrayidx197 = getelementptr inbounds float, ptr %b, i64 %74 - %75 = load float, ptr %arrayidx197, align 4 - %mul198 = fmul float %75, %alpha - %arrayidx201 = getelementptr inbounds float, ptr %a, i64 %74 - %76 = load float, ptr %arrayidx201, align 4 - %add202 = fadd float %76, %mul198 - store float %add202, ptr %arrayidx201, align 4 - %77 = or disjoint i64 %indvars.iv, 26 - %arrayidx205 = getelementptr inbounds float, ptr %b, i64 %77 - %78 = load float, ptr %arrayidx205, align 4 - %mul206 = fmul float %78, %alpha - %arrayidx209 = getelementptr inbounds float, ptr %a, i64 %77 - %79 = load float, ptr %arrayidx209, align 4 - %add210 = fadd float %79, %mul206 - store float %add210, ptr %arrayidx209, align 4 - %80 = or disjoint i64 %indvars.iv, 27 - %arrayidx213 = getelementptr inbounds float, ptr %b, i64 %80 - %81 = load float, ptr %arrayidx213, align 4 - %mul214 = fmul float %81, %alpha - %arrayidx217 = getelementptr inbounds float, ptr %a, i64 %80 - %82 = load float, ptr %arrayidx217, align 4 - %add218 = fadd float %82, %mul214 - store float %add218, ptr %arrayidx217, align 4 - %83 = or disjoint i64 %indvars.iv, 28 - %arrayidx221 = getelementptr inbounds float, ptr %b, i64 %83 - %84 = load float, ptr %arrayidx221, align 4 - %mul222 = fmul float %84, %alpha - %arrayidx225 = getelementptr inbounds float, ptr %a, i64 %83 - %85 = load float, ptr %arrayidx225, align 4 - %add226 = fadd float %85, %mul222 - store float %add226, ptr %arrayidx225, align 4 - %86 = or disjoint i64 %indvars.iv, 29 - %arrayidx229 = getelementptr inbounds float, ptr %b, i64 %86 - %87 = load float, ptr %arrayidx229, align 4 - %mul230 = fmul float %87, %alpha - %arrayidx233 = getelementptr inbounds float, ptr %a, i64 %86 - %88 = load float, ptr %arrayidx233, align 4 - %add234 = fadd float %88, %mul230 - store float %add234, ptr %arrayidx233, align 4 - %89 = or disjoint i64 %indvars.iv, 30 - %arrayidx237 = getelementptr inbounds float, ptr %b, i64 %89 - %90 = load float, ptr %arrayidx237, align 4 - %mul238 = fmul float %90, %alpha - %arrayidx241 = getelementptr inbounds float, ptr %a, i64 %89 - %91 = load float, ptr %arrayidx241, align 4 - %add242 = fadd float %91, %mul238 - store float %add242, ptr %arrayidx241, align 4 - %92 = or disjoint i64 %indvars.iv, 31 - %arrayidx245 = getelementptr inbounds float, ptr %b, i64 %92 - %93 = load float, ptr %arrayidx245, align 4 - %mul246 = fmul float %93, %alpha - %arrayidx249 = getelementptr inbounds float, ptr %a, i64 %92 - %94 = load float, ptr %arrayidx249, align 4 - %add250 = fadd float %94, %mul246 - store float %add250, ptr %arrayidx249, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32 - %cmp = icmp slt i64 %indvars.iv.next, 3200 - br i1 %cmp, label %for.body, label %for.end - -; CHECK-LABEL: @goo32 - -; CHECK: for.body: -; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] -; CHECK: %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvar -; CHECK: %0 = load float, ptr %arrayidx, align 4 -; CHECK: %mul = fmul float %0, %alpha -; CHECK: %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvar -; CHECK: %1 = load float, ptr %arrayidx2, align 4 -; CHECK: %add = fadd float %1, %mul -; CHECK: store float %add, ptr %arrayidx2, align 4 -; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 3199 -; CHECK: br i1 %exitcond, label %for.end, label %for.body -; CHECK: ret - -for.end: ; preds = %for.body - ret void -} - -attributes #0 = { nounwind uwtable } diff --git a/llvm/test/Transforms/LoopReroll/complex_reroll.ll b/llvm/test/Transforms/LoopReroll/complex_reroll.ll deleted file mode 100644 index 27139eeecf8ce..0000000000000 --- a/llvm/test/Transforms/LoopReroll/complex_reroll.ll +++ /dev/null @@ -1,237 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=loop-reroll %s | FileCheck %s -declare i32 @goo(i32, i32) - -@buf = external global ptr -@aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1 - -define i32 @test1(i32 %len) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SUM44_020:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[WHILE_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr @aaa, i64 [[INDVAR]] -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[SCEVGEP]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i64 -; CHECK-NEXT: [[ADD]] = add i64 [[CONV]], [[SUM44_020]] -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP0]], 15 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] -; CHECK: while.end: -; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i64 [ [[ADD]], [[WHILE_BODY]] ] -; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD9_LCSSA]] to i32 -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @goo(i32 0, i32 [[CONV11]]) -; CHECK-NEXT: unreachable -; -entry: - br label %while.body - -while.body: - - %dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ] - %buf.021 = phi ptr [ @aaa, %entry ], [ %add.ptr, %while.body ] - %sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ] - %0 = load i8, ptr %buf.021, align 1 - %conv = zext i8 %0 to i64 - %add = add i64 %conv, %sum44.020 - %arrayidx1 = getelementptr inbounds i8, ptr %buf.021, i64 1 - %1 = load i8, ptr %arrayidx1, align 1 - %conv2 = zext i8 %1 to i64 - %add3 = add i64 %add, %conv2 - %arrayidx4 = getelementptr inbounds i8, ptr %buf.021, i64 2 - %2 = load i8, ptr %arrayidx4, align 1 - %conv5 = zext i8 %2 to i64 - %add6 = add i64 %add3, %conv5 - %arrayidx7 = getelementptr inbounds i8, ptr %buf.021, i64 3 - %3 = load i8, ptr %arrayidx7, align 1 - %conv8 = zext i8 %3 to i64 - %add9 = add i64 %add6, %conv8 - %add.ptr = getelementptr inbounds i8, ptr %buf.021, i64 4 - %dec = add nsw i32 %dec22, -1 - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body - %conv11 = trunc i64 %add9 to i32 - %call = tail call i32 @goo(i32 0, i32 %conv11) - unreachable -} - -define i32 @test2(i32 %N, ptr nocapture readonly %a, i32 %S) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_9:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP_9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.for.cond.cleanup_crit_edge: -; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi i32 [ [[ADD2_LCSSA]], [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE:%.*]] ], [ [[S:%.*]], [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[S_ADDR_0_LCSSA]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ] -; CHECK-NEXT: [[S_ADDR_011:%.*]] = phi i32 [ [[S]], [[FOR_BODY_LR_PH]] ], [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[INDVAR]], 2 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SCEVGEP]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP6]], [[S_ADDR_011]] -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP4]], [[TMP3]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE]], label [[FOR_BODY]] -; -entry: - %cmp.9 = icmp sgt i32 %N, 0 - br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup - -for.body.lr.ph: - br label %for.body - -for.cond.for.cond.cleanup_crit_edge: - br label %for.cond.cleanup - -for.cond.cleanup: - %S.addr.0.lcssa = phi i32 [ %add2, %for.cond.for.cond.cleanup_crit_edge ], [ %S, %entry ] - ret i32 %S.addr.0.lcssa - -for.body: - - %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ] - %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ] - %a.addr.010 = phi ptr [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ] - %incdec.ptr = getelementptr inbounds i32, ptr %a.addr.010, i64 1 - %0 = load i32, ptr %a.addr.010, align 4 - %add = add nsw i32 %0, %S.addr.011 - %incdec.ptr1 = getelementptr inbounds i32, ptr %a.addr.010, i64 2 - %1 = load i32, ptr %incdec.ptr, align 4 - %add2 = add nsw i32 %add, %1 - %add3 = add nsw i32 %i.012, 2 - %cmp = icmp slt i32 %add3, %N - br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge -} - -define i32 @test3(ptr nocapture readonly %buf, i32 %len) #0 { -; CHECK-LABEL: @test3( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[LEN:%.*]], 1 -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] -; CHECK: while.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -2 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[S_012:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ undef, [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[INDVAR]], -4 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[BUF:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SCEVGEP]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP6]], [[S_012]] -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP4]], [[TMP3]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY]] ] -; CHECK-NEXT: br label [[WHILE_END]] -; CHECK: while.end: -; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD2_LCSSA]], [[WHILE_END_LOOPEXIT]] ] -; CHECK-NEXT: ret i32 [[S_0_LCSSA]] -; -entry: - %cmp10 = icmp sgt i32 %len, 1 - br i1 %cmp10, label %while.body.preheader, label %while.end - -while.body.preheader: ; preds = %entry - br label %while.body - -while.body: ; preds = %while.body.preheader, %while.body - - %i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ] - %S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] - %buf.addr.011 = phi ptr [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ] - %0 = load i32, ptr %buf.addr.011, align 4 - %add = add nsw i32 %0, %S.012 - %arrayidx1 = getelementptr inbounds i32, ptr %buf.addr.011, i64 -1 - %1 = load i32, ptr %arrayidx1, align 4 - %add2 = add nsw i32 %add, %1 - %add.ptr = getelementptr inbounds i32, ptr %buf.addr.011, i64 -2 - %sub = add nsw i32 %i.013, -2 - %cmp = icmp sgt i32 %sub, 1 - br i1 %cmp, label %while.body, label %while.end.loopexit - -while.end.loopexit: ; preds = %while.body - br label %while.end - -while.end: ; preds = %while.end.loopexit, %entry - %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2, %while.end.loopexit ] - ret i32 %S.0.lcssa -} - -define i32 @test4(i32 %len) { -; CHECK-LABEL: @test4( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SUM44_020:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[WHILE_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVAR]] to i32 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr @aaa, i64 [[INDVAR]] -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[SCEVGEP]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i64 -; CHECK-NEXT: [[ADD]] = add i64 [[CONV]], [[SUM44_020]] -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP0]], 23 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] -; CHECK: while.end: -; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i64 [ [[ADD]], [[WHILE_BODY]] ] -; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD9_LCSSA]] to i32 -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @goo(i32 0, i32 [[CONV11]]) -; CHECK-NEXT: unreachable -; -entry: - br label %while.body - -while.body: - %a = phi i32 [ 4, %entry ], [ %a.next, %while.body ] - %b = phi i32 [ 6, %entry ], [ %b.next, %while.body ] - %buf.021 = phi ptr [ @aaa, %entry ], [ %add.ptr, %while.body ] - %sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ] - %0 = load i8, ptr %buf.021, align 1 - %conv = zext i8 %0 to i64 - %add = add i64 %conv, %sum44.020 - %arrayidx1 = getelementptr inbounds i8, ptr %buf.021, i64 1 - %1 = load i8, ptr %arrayidx1, align 1 - %conv2 = zext i8 %1 to i64 - %add3 = add i64 %add, %conv2 - %arrayidx4 = getelementptr inbounds i8, ptr %buf.021, i64 2 - %2 = load i8, ptr %arrayidx4, align 1 - %conv5 = zext i8 %2 to i64 - %add6 = add i64 %add3, %conv5 - %arrayidx7 = getelementptr inbounds i8, ptr %buf.021, i64 3 - %3 = load i8, ptr %arrayidx7, align 1 - %conv8 = zext i8 %3 to i64 - %add9 = add i64 %add6, %conv8 - %add.ptr = getelementptr inbounds i8, ptr %buf.021, i64 4 - %a.next = add nsw i32 %a, -1 - %b.next = add nsw i32 %b, -1 - %cond = add nsw i32 %a, %b - %tobool = icmp eq i32 %cond, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body - %conv11 = trunc i64 %add9 to i32 - %call = tail call i32 @goo(i32 0, i32 %conv11) - unreachable -} - diff --git a/llvm/test/Transforms/LoopReroll/external_use.ll b/llvm/test/Transforms/LoopReroll/external_use.ll deleted file mode 100644 index 2124f3b15cd2d..0000000000000 --- a/llvm/test/Transforms/LoopReroll/external_use.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: opt < %s -passes=loop-reroll -S | FileCheck %s - -; Check whether rerolling is rejected if values of the base and root -; instruction are used outside the loop block. - -; Only the base/root instructions except a loop increment instruction -define void @test1() { -entry: - br label %loop1 - -loop1: -;CHECK-LABEL: loop1: -;CHECK-NEXT: %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop1 ] -;CHECK-NEXT: %indvar.1 = add nsw i64 %indvar, 1 - - %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop1 ] - %indvar.1 = add nsw i64 %indvar, 1 - %indvar.next = add nsw i64 %indvar, 2 - %cmp = icmp slt i64 %indvar.next, 200 - br i1 %cmp, label %loop1, label %exit - -exit: - %var1 = phi i64 [ %indvar.1, %loop1 ] - %var2 = phi i64 [ %indvar, %loop1 ] - ret void -} - -; Both the base/root instructions and reduction instructions -define void @test2() { -entry: - br label %loop2 - -loop2: -;CHECK-LABEL: loop2: -;CHECK-NEXT: %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %loop2 ] -;CHECK-NEXT: %redvar = phi i32 [ 0, %entry ], [ %add.2, %loop2 ] -;CHECK-NEXT: %indvar.1 = add nuw nsw i32 %indvar, 1 -;CHECK-NEXT: %indvar.2 = add nuw nsw i32 %indvar, 2 - - %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %loop2 ] - %redvar = phi i32 [ 0, %entry ], [ %add.2, %loop2 ] - %indvar.1 = add nuw nsw i32 %indvar, 1 - %indvar.2 = add nuw nsw i32 %indvar, 2 - %mul.0 = mul nsw i32 %indvar, %indvar - %mul.1 = mul nsw i32 %indvar.1, %indvar.1 - %mul.2 = mul nsw i32 %indvar.2, %indvar.2 - %add.0 = add nsw i32 %redvar, %mul.0 - %add.1 = add nsw i32 %add.0, %mul.1 - %add.2 = add nsw i32 %add.1, %mul.2 - %indvar.next = add nuw nsw i32 %indvar, 3 - %cmp = icmp slt i32 %indvar.next, 300 - br i1 %cmp, label %loop2, label %exit - -exit: - %a = phi i32 [ %indvar, %loop2 ] - %b = phi i32 [ %indvar.1, %loop2 ] - %c = phi i32 [ %indvar.2, %loop2 ] - %x = phi i32 [ %add.2, %loop2 ] - ret void -} diff --git a/llvm/test/Transforms/LoopReroll/extra_instr.ll b/llvm/test/Transforms/LoopReroll/extra_instr.ll deleted file mode 100644 index 3114463a3e041..0000000000000 --- a/llvm/test/Transforms/LoopReroll/extra_instr.ll +++ /dev/null @@ -1,361 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -passes=loop-reroll %s | FileCheck %s -target triple = "aarch64--linux-gnu" - -define void @rerollable1(ptr nocapture %a) { -; CHECK-LABEL: define void @rerollable1 -; CHECK-SAME: (ptr nocapture [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[IV]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 160 -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[IV]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 80 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[VALUE0:%.*]] = load i32, ptr [[SCEVGEP1]], align 4 -; CHECK-NEXT: store i32 [[VALUE0]], ptr [[SCEVGEP]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[IV]], 9 -; CHECK-NEXT: br i1 [[EXITCOND2]], label [[EXIT:%.*]], label [[LOOP]] -; CHECK: exit: -; CHECK-NEXT: ret void -; -entry: - br label %loop - -loop: - - - ; base instruction - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - - ; NO unrerollable instructions - - ; extra simple arithmetic operations, used by root instructions - %plus20 = add nuw nsw i64 %iv, 20 - %plus10 = add nuw nsw i64 %iv, 10 - - ; root instruction 0 - %ldptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 0 - %value0 = load i32, ptr %ldptr0, align 4 - %stptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 0 - store i32 %value0, ptr %stptr0, align 4 - - ; root instruction 1 - %ldptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 1 - %value1 = load i32, ptr %ldptr1, align 4 - %stptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 1 - store i32 %value1, ptr %stptr1, align 4 - - ; loop-increment - %iv.next = add nuw nsw i64 %iv, 1 - - ; latch - %exitcond = icmp eq i64 %iv.next, 5 - br i1 %exitcond, label %exit, label %loop - -exit: - ret void -} - -define void @unrerollable1(ptr nocapture %a) { -; CHECK-LABEL: define void @unrerollable1 -; CHECK-SAME: (ptr nocapture [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[STPTRX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[IV]], i64 0 -; CHECK-NEXT: store i32 999, ptr [[STPTRX]], align 4 -; CHECK-NEXT: [[PLUS20:%.*]] = add nuw nsw i64 [[IV]], 20 -; CHECK-NEXT: [[PLUS10:%.*]] = add nuw nsw i64 [[IV]], 10 -; CHECK-NEXT: [[LDPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 0 -; CHECK-NEXT: [[VALUE0:%.*]] = load i32, ptr [[LDPTR0]], align 4 -; CHECK-NEXT: [[STPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 0 -; CHECK-NEXT: store i32 [[VALUE0]], ptr [[STPTR0]], align 4 -; CHECK-NEXT: [[LDPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 1 -; CHECK-NEXT: [[VALUE1:%.*]] = load i32, ptr [[LDPTR1]], align 4 -; CHECK-NEXT: [[STPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 1 -; CHECK-NEXT: store i32 [[VALUE1]], ptr [[STPTR1]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 5 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]] -; CHECK: exit: -; CHECK-NEXT: ret void -; -entry: - br label %loop - -loop: - - - ; base instruction - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - - ; unrerollable instructions using %iv - %stptrx = getelementptr inbounds [2 x i32], ptr %a, i64 %iv, i64 0 - store i32 999, ptr %stptrx, align 4 - - ; extra simple arithmetic operations, used by root instructions - %plus20 = add nuw nsw i64 %iv, 20 - %plus10 = add nuw nsw i64 %iv, 10 - - ; root instruction 0 - %ldptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 0 - %value0 = load i32, ptr %ldptr0, align 4 - %stptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 0 - store i32 %value0, ptr %stptr0, align 4 - - ; root instruction 1 - %ldptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 1 - %value1 = load i32, ptr %ldptr1, align 4 - %stptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 1 - store i32 %value1, ptr %stptr1, align 4 - - ; loop-increment - %iv.next = add nuw nsw i64 %iv, 1 - - ; latch - %exitcond = icmp eq i64 %iv.next, 5 - br i1 %exitcond, label %exit, label %loop - -exit: - ret void -} - -define void @unrerollable2(ptr nocapture %a) { -; CHECK-LABEL: define void @unrerollable2 -; CHECK-SAME: (ptr nocapture [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[STPTRX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[IV_NEXT]], i64 0 -; CHECK-NEXT: store i32 999, ptr [[STPTRX]], align 4 -; CHECK-NEXT: [[PLUS20:%.*]] = add nuw nsw i64 [[IV]], 20 -; CHECK-NEXT: [[PLUS10:%.*]] = add nuw nsw i64 [[IV]], 10 -; CHECK-NEXT: [[LDPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 0 -; CHECK-NEXT: [[VALUE0:%.*]] = load i32, ptr [[LDPTR0]], align 4 -; CHECK-NEXT: [[STPTR0:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 0 -; CHECK-NEXT: store i32 [[VALUE0]], ptr [[STPTR0]], align 4 -; CHECK-NEXT: [[LDPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS20]], i64 1 -; CHECK-NEXT: [[VALUE1:%.*]] = load i32, ptr [[LDPTR1]], align 4 -; CHECK-NEXT: [[STPTR1:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 [[PLUS10]], i64 1 -; CHECK-NEXT: store i32 [[VALUE1]], ptr [[STPTR1]], align 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 5 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]] -; CHECK: exit: -; CHECK-NEXT: ret void -; -entry: - br label %loop - -loop: - - - ; base instruction - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - - ; loop-increment - %iv.next = add nuw nsw i64 %iv, 1 - - ; unrerollable instructions using %iv.next - %stptrx = getelementptr inbounds [2 x i32], ptr %a, i64 %iv.next, i64 0 - store i32 999, ptr %stptrx, align 4 - - ; extra simple arithmetic operations, used by root instructions - %plus20 = add nuw nsw i64 %iv, 20 - %plus10 = add nuw nsw i64 %iv, 10 - - ; root instruction 0 - %ldptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 0 - %value0 = load i32, ptr %ldptr0, align 4 - %stptr0 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 0 - store i32 %value0, ptr %stptr0, align 4 - - ; root instruction 1 - %ldptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus20, i64 1 - %value1 = load i32, ptr %ldptr1, align 4 - %stptr1 = getelementptr inbounds [2 x i32], ptr %a, i64 %plus10, i64 1 - store i32 %value1, ptr %stptr1, align 4 - - ; latch - %exitcond = icmp eq i64 %iv.next, 5 - br i1 %exitcond, label %exit, label %loop - -exit: - ret void -} - -define dso_local void @rerollable2() { -; CHECK-LABEL: define dso_local void @rerollable2() { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IV]], 24 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[IV]], 20 -; CHECK-NEXT: [[IV_SCALED_DIV5:%.*]] = udiv i32 [[TMP1]], 5 -; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_DIV5]]) -; CHECK-NEXT: [[IV_SCALED_ADD4_DIV5:%.*]] = udiv i32 [[TMP0]], 5 -; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD4_DIV5]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV]], 8 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]] -; CHECK: exit: -; CHECK-NEXT: ret void -; -entry: - br label %loop - -loop: - - - ; induction variable - %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] - - ; scale instruction - %iv.mul3 = mul nuw nsw i32 %iv, 3 - - ; extra simple arithmetic operations, used by root instructions - %iv.scaled = add nuw nsw i32 %iv.mul3, 20 - - ; NO unrerollable instructions - - ; root set 1 - - ; base instruction - %iv.scaled.div5 = udiv i32 %iv.scaled, 5 - tail call void @bar(i32 %iv.scaled.div5) - ; root instruction 0 - %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 - %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 - tail call void @bar(i32 %iv.scaled.add1.div5) - ; root instruction 2 - %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 - %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 - tail call void @bar(i32 %iv.scaled.add2.div5) - - ; root set 2 - - ; base instruction - %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 - %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 - tail call void @bar(i32 %iv.scaled.add4.div5) - ; root instruction 0 - %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 - %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 - tail call void @bar(i32 %iv.scaled.add5.div5) - ; root instruction 2 - %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 - %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 - tail call void @bar(i32 %iv.scaled.add6.div5) - - ; loop-increment - %iv.next = add nuw nsw i32 %iv, 1 - - ; latch - %cmp = icmp ult i32 %iv.next, 3 - br i1 %cmp, label %loop, label %exit - -exit: - ret void -} - -define dso_local void @unrerollable3() { -; CHECK-LABEL: define dso_local void @unrerollable3() { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_MUL3:%.*]] = mul nuw nsw i32 [[IV]], 3 -; CHECK-NEXT: [[IV_SCALED:%.*]] = add nuw nsw i32 [[IV_MUL3]], 20 -; CHECK-NEXT: [[IV_MUL7:%.*]] = mul nuw nsw i32 [[IV]], 7 -; CHECK-NEXT: tail call void @bar(i32 [[IV_MUL7]]) -; CHECK-NEXT: [[IV_SCALED_DIV5:%.*]] = udiv i32 [[IV_SCALED]], 5 -; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_DIV5]]) -; CHECK-NEXT: [[IV_SCALED_ADD1:%.*]] = add nuw nsw i32 [[IV_SCALED]], 1 -; CHECK-NEXT: [[IV_SCALED_ADD1_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD1]], 5 -; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD1_DIV5]]) -; CHECK-NEXT: [[IV_SCALED_ADD2:%.*]] = add nuw nsw i32 [[IV_SCALED]], 2 -; CHECK-NEXT: [[IV_SCALED_ADD2_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD2]], 5 -; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD2_DIV5]]) -; CHECK-NEXT: [[IV_SCALED_ADD4:%.*]] = add nuw nsw i32 [[IV_SCALED]], 4 -; CHECK-NEXT: [[IV_SCALED_ADD4_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD4]], 5 -; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD4_DIV5]]) -; CHECK-NEXT: [[IV_SCALED_ADD5:%.*]] = add nuw nsw i32 [[IV_SCALED]], 5 -; CHECK-NEXT: [[IV_SCALED_ADD5_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD5]], 5 -; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD5_DIV5]]) -; CHECK-NEXT: [[IV_SCALED_ADD6:%.*]] = add nuw nsw i32 [[IV_SCALED]], 6 -; CHECK-NEXT: [[IV_SCALED_ADD6_DIV5:%.*]] = udiv i32 [[IV_SCALED_ADD6]], 5 -; CHECK-NEXT: tail call void @bar(i32 [[IV_SCALED_ADD6_DIV5]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], 3 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] -; CHECK: exit: -; CHECK-NEXT: ret void -; -entry: - br label %loop - -loop: - - - ; induction variable - %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] - - ; scale instruction - %iv.mul3 = mul nuw nsw i32 %iv, 3 - - ; extra simple arithmetic operations, used by root instructions - %iv.scaled = add nuw nsw i32 %iv.mul3, 20 - - ; unrerollable instructions using %iv - %iv.mul7 = mul nuw nsw i32 %iv, 7 - tail call void @bar(i32 %iv.mul7) - - ; root set 1 - - ; base instruction - %iv.scaled.div5 = udiv i32 %iv.scaled, 5 - tail call void @bar(i32 %iv.scaled.div5) - ; root instruction 0 - %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 - %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 - tail call void @bar(i32 %iv.scaled.add1.div5) - ; root instruction 2 - %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 - %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 - tail call void @bar(i32 %iv.scaled.add2.div5) - - ; root set 2 - - ; base instruction - %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 - %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 - tail call void @bar(i32 %iv.scaled.add4.div5) - ; root instruction 0 - %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 - %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 - tail call void @bar(i32 %iv.scaled.add5.div5) - ; root instruction 2 - %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 - %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 - tail call void @bar(i32 %iv.scaled.add6.div5) - - ; loop-increment - %iv.next = add nuw nsw i32 %iv, 1 - - ; latch - %cmp = icmp ult i32 %iv.next, 3 - br i1 %cmp, label %loop, label %exit - -exit: - ret void -} - -declare dso_local void @bar(i32) diff --git a/llvm/test/Transforms/LoopReroll/indvar_with_ext.ll b/llvm/test/Transforms/LoopReroll/indvar_with_ext.ll deleted file mode 100644 index 3fcd43f1866a6..0000000000000 --- a/llvm/test/Transforms/LoopReroll/indvar_with_ext.ll +++ /dev/null @@ -1,184 +0,0 @@ -; RUN: opt -S -passes=loop-reroll %s | FileCheck %s -target triple = "aarch64--linux-gnu" - -define void @test(i32 %n, ptr %arrayidx200, ptr %arrayidx164, ptr %arrayidx172) { -entry: - %rem.i = srem i32 %n, 4 - %t22 = load float, ptr %arrayidx172, align 4 - %cmp.9 = icmp eq i32 %n, 0 - %t7 = sext i32 %n to i64 - br i1 %cmp.9, label %while.end, label %while.body.preheader - -while.body.preheader: - br label %while.body - -while.body: -;CHECK-LABEL: while.body: -;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] -;CHECK-NEXT: %arrayidx62.i = getelementptr inbounds float, ptr %arrayidx200, i64 %indvar -;CHECK-NEXT: %t1 = load float, ptr %arrayidx62.i, align 4 -;CHECK-NEXT: %arrayidx64.i = getelementptr inbounds float, ptr %arrayidx164, i64 %indvar -;CHECK-NEXT: %t2 = load float, ptr %arrayidx64.i, align 4 -;CHECK-NEXT: %mul65.i = fmul fast float %t2, %t22 -;CHECK-NEXT: %add66.i = fadd fast float %mul65.i, %t1 -;CHECK-NEXT: store float %add66.i, ptr %arrayidx62.i, align 4 -;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}} -;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body - - %indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ] - %i.22.i = phi i32 [ %add103.i, %while.body ], [ %rem.i, %while.body.preheader ] - %arrayidx62.i = getelementptr inbounds float, ptr %arrayidx200, i64 %indvars.iv.i423 - %t1 = load float, ptr %arrayidx62.i, align 4 - %arrayidx64.i = getelementptr inbounds float, ptr %arrayidx164, i64 %indvars.iv.i423 - %t2 = load float, ptr %arrayidx64.i, align 4 - %mul65.i = fmul fast float %t2, %t22 - %add66.i = fadd fast float %mul65.i, %t1 - store float %add66.i, ptr %arrayidx62.i, align 4 - %t3 = add nsw i64 %indvars.iv.i423, 1 - %arrayidx71.i = getelementptr inbounds float, ptr %arrayidx200, i64 %t3 - %t4 = load float, ptr %arrayidx71.i, align 4 - %arrayidx74.i = getelementptr inbounds float, ptr %arrayidx164, i64 %t3 - %t5 = load float, ptr %arrayidx74.i, align 4 - %mul75.i = fmul fast float %t5, %t22 - %add76.i = fadd fast float %mul75.i, %t4 - store float %add76.i, ptr %arrayidx71.i, align 4 - %add103.i = add nsw i32 %i.22.i, 2 - %t6 = sext i32 %add103.i to i64 - %cmp58.i = icmp slt i64 %t6, %t7 - %indvars.iv.next.i424 = add i64 %indvars.iv.i423, 2 - br i1 %cmp58.i, label %while.body, label %while.end.loopexit - -while.end.loopexit: - br label %while.end - -while.end: - ret void -} - -; Function Attrs: noinline norecurse nounwind -define i32 @test2(i64 %n, ptr nocapture %x, ptr nocapture readonly %y) { -entry: - %cmp18 = icmp sgt i64 %n, 0 - br i1 %cmp18, label %for.body.preheader, label %for.end - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - -;CHECK-LABEL: for.body: -;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ] -;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %y, i64 %indvar -;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, ptr %arrayidx, align 4 -;CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, ptr %x, i64 %indvar -;CHECK-NEXT: store i32 [[T1]], ptr %arrayidx3, align 4 -;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}} -;CHECK-NEXT: br i1 %exitcond, label %for.end.loopexit, label %for.body - - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, ptr %y, i64 %indvars.iv - %0 = load i32, ptr %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, ptr %x, i64 %indvars.iv - store i32 %0, ptr %arrayidx3, align 4 - %1 = or disjoint i64 %indvars.iv, 1 - %arrayidx5 = getelementptr inbounds i32, ptr %y, i64 %1 - %2 = load i32, ptr %arrayidx5, align 4 - %arrayidx8 = getelementptr inbounds i32, ptr %x, i64 %1 - store i32 %2, ptr %arrayidx8, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 - %cmp = icmp slt i64 %indvars.iv.next, %n - br i1 %cmp, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret i32 0 -} - -; Function Attrs: noinline norecurse nounwind -define i32 @test3(i32 %n, ptr nocapture %x, ptr nocapture readonly %y) { -entry: - %cmp21 = icmp sgt i32 %n, 0 - br i1 %cmp21, label %for.body.preheader, label %for.end - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - -;CHECK-LABEL: for.body: -;CHECK: %add12 = add i8 %i.022, 2 -;CHECK-NEXT: %conv = sext i8 %add12 to i32 -;CHECK-NEXT: %cmp = icmp slt i32 %conv, %n -;CHECK-NEXT: br i1 %cmp, label %for.body, label %for.end.loopexit - - %conv23 = phi i32 [ %conv, %for.body ], [ 0, %for.body.preheader ] - %i.022 = phi i8 [ %add12, %for.body ], [ 0, %for.body.preheader ] - %idxprom = sext i8 %i.022 to i64 - %arrayidx = getelementptr inbounds i32, ptr %y, i64 %idxprom - %0 = load i32, ptr %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, ptr %x, i64 %idxprom - store i32 %0, ptr %arrayidx3, align 4 - %add = or disjoint i32 %conv23, 1 - %idxprom5 = sext i32 %add to i64 - %arrayidx6 = getelementptr inbounds i32, ptr %y, i64 %idxprom5 - %1 = load i32, ptr %arrayidx6, align 4 - %arrayidx10 = getelementptr inbounds i32, ptr %x, i64 %idxprom5 - store i32 %1, ptr %arrayidx10, align 4 - %add12 = add i8 %i.022, 2 - %conv = sext i8 %add12 to i32 - %cmp = icmp slt i32 %conv, %n - br i1 %cmp, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret i32 0 -} - -; Function Attrs: noinline norecurse nounwind -define i32 @test4(i64 %n, ptr nocapture %x, ptr nocapture readonly %y) { -entry: - %cmp18 = icmp eq i64 %n, 0 - br i1 %cmp18, label %for.end, label %for.body.preheader - -for.body.preheader: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - -;CHECK-LABEL: for.body: -;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ] -;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %y, i64 %indvar -;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, ptr %arrayidx, align 4 -;CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, ptr %x, i64 %indvar -;CHECK-NEXT: store i32 [[T1]], ptr %arrayidx3, align 4 -;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}} -;CHECK-NEXT: br i1 %exitcond, label %for.end.loopexit, label %for.body - - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, ptr %y, i64 %indvars.iv - %0 = load i32, ptr %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, ptr %x, i64 %indvars.iv - store i32 %0, ptr %arrayidx3, align 4 - %1 = or disjoint i64 %indvars.iv, 1 - %arrayidx5 = getelementptr inbounds i32, ptr %y, i64 %1 - %2 = load i32, ptr %arrayidx5, align 4 - %arrayidx8 = getelementptr inbounds i32, ptr %x, i64 %1 - store i32 %2, ptr %arrayidx8, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 - %cmp = icmp ult i64 %indvars.iv.next, %n - br i1 %cmp, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - ret i32 0 -} - diff --git a/llvm/test/Transforms/LoopReroll/negative.ll b/llvm/test/Transforms/LoopReroll/negative.ll deleted file mode 100644 index ef850c0e23cfb..0000000000000 --- a/llvm/test/Transforms/LoopReroll/negative.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: opt -S -passes=loop-reroll %s | FileCheck %s -target triple = "aarch64--linux-gnu" -@buf = global [16 x i8] c"\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A", align 1 - -define i32 @test1(i32 %len, ptr nocapture readonly %buf) #0 { -entry: - %cmp.13 = icmp sgt i32 %len, 1 - br i1 %cmp.13, label %while.body.lr.ph, label %while.end - -while.body.lr.ph: ; preds = %entry - br label %while.body - -while.body: -;CHECK-LABEL: while.body: -;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ] -;CHECK-NEXT: %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add, %while.body ] -;CHECK-NOT: %sub5 = add nsw i32 %len.addr.014, -1 -;CHECK-NOT: %sub5 = add nsw i32 %len.addr.014, -2 -;CHECK: br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body - - %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add4, %while.body ] - %len.addr.014 = phi i32 [ %len, %while.body.lr.ph ], [ %sub5, %while.body ] - %idxprom = sext i32 %len.addr.014 to i64 - %arrayidx = getelementptr inbounds i8, ptr %buf, i64 %idxprom - %0 = load i8, ptr %arrayidx, align 1 - %conv = zext i8 %0 to i64 - %add = add i64 %conv, %sum4.015 - %sub = add nsw i32 %len.addr.014, -1 - %idxprom1 = sext i32 %sub to i64 - %arrayidx2 = getelementptr inbounds i8, ptr %buf, i64 %idxprom1 - %1 = load i8, ptr %arrayidx2, align 1 - %conv3 = zext i8 %1 to i64 - %add4 = add i64 %add, %conv3 - %sub5 = add nsw i32 %len.addr.014, -2 - %cmp = icmp sgt i32 %sub5, 1 - br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge - -while.cond.while.end_crit_edge: ; preds = %while.body - %add4.lcssa = phi i64 [ %add4, %while.body ] - %phitmp = trunc i64 %add4.lcssa to i32 - br label %while.end - -while.end: ; preds = %while.cond.while.end_crit_edge, %entry - %sum4.0.lcssa = phi i32 [ %phitmp, %while.cond.while.end_crit_edge ], [ 0, %entry ] - ret i32 %sum4.0.lcssa - unreachable -} - diff --git a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll deleted file mode 100644 index 80ea0506f774c..0000000000000 --- a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll +++ /dev/null @@ -1,168 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=loop-reroll -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" -target triple = "thumbv7-none-linux" - -;void foo(int *A, int *B, int m, int n) { -; for (int i = m; i < n; i+=4) { -; A[i+0] = B[i+0] * 4; -; A[i+1] = B[i+1] * 4; -; A[i+2] = B[i+2] * 4; -; A[i+3] = B[i+3] * 4; -; } -;} -define void @foo(ptr nocapture %A, ptr nocapture readonly %B, i32 %m, i32 %n) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP34:%.*]] = icmp slt i32 [[M:%.*]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP34]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], 4 -; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[M]] -; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 3 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[M]], [[INDVAR]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP7]], 2 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP6]] -; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP5]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - %cmp34 = icmp slt i32 %m, %n - br i1 %cmp34, label %for.body, label %for.end - -for.body: ; preds = %entry, %for.body - %i.035 = phi i32 [ %add18, %for.body ], [ %m, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.035 - %0 = load i32, ptr %arrayidx, align 4 - %mul = shl nsw i32 %0, 2 - %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.035 - store i32 %mul, ptr %arrayidx2, align 4 - %add3 = add nsw i32 %i.035, 1 - %arrayidx4 = getelementptr inbounds i32, ptr %B, i32 %add3 - %1 = load i32, ptr %arrayidx4, align 4 - %mul5 = shl nsw i32 %1, 2 - %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %add3 - store i32 %mul5, ptr %arrayidx7, align 4 - %add8 = add nsw i32 %i.035, 2 - %arrayidx9 = getelementptr inbounds i32, ptr %B, i32 %add8 - %2 = load i32, ptr %arrayidx9, align 4 - %mul10 = shl nsw i32 %2, 2 - %arrayidx12 = getelementptr inbounds i32, ptr %A, i32 %add8 - store i32 %mul10, ptr %arrayidx12, align 4 - %add13 = add nsw i32 %i.035, 3 - %arrayidx14 = getelementptr inbounds i32, ptr %B, i32 %add13 - %3 = load i32, ptr %arrayidx14, align 4 - %mul15 = shl nsw i32 %3, 2 - %arrayidx17 = getelementptr inbounds i32, ptr %A, i32 %add13 - store i32 %mul15, ptr %arrayidx17, align 4 - %add18 = add nsw i32 %i.035, 4 - %cmp = icmp slt i32 %add18, %n - br i1 %cmp, label %for.body, label %for.end - -for.end: ; preds = %for.body, %entry - ret void -} - -;void daxpy_ur(int n,float da,ptr dx,ptr dy) -; { -; int m = n % 4; -; for (int i = m; i < n; i = i + 4) -; { -; dy[i] = dy[i] + da*dx[i]; -; dy[i+1] = dy[i+1] + da*dx[i+1]; -; dy[i+2] = dy[i+2] + da*dx[i+2]; -; dy[i+3] = dy[i+3] + da*dx[i+3]; -; } -; } -define void @daxpy_ur(i32 %n, float %da, ptr nocapture readonly %dx, ptr nocapture %dy) { -; CHECK-LABEL: @daxpy_ur( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[REM:%.*]] = srem i32 [[N:%.*]], 4 -; CHECK-NEXT: [[CMP55:%.*]] = icmp slt i32 [[REM]], [[N]] -; CHECK-NEXT: br i1 [[CMP55]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[REM]] -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 3 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[REM]], [[INDVAR]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[DY:%.*]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[DX:%.*]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP7]], [[DA:%.*]] -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[MUL]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP4]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - %rem = srem i32 %n, 4 - %cmp55 = icmp slt i32 %rem, %n - br i1 %cmp55, label %for.body, label %for.end - -for.body: ; preds = %entry, %for.body - %i.056 = phi i32 [ %add27, %for.body ], [ %rem, %entry ] - %arrayidx = getelementptr inbounds float, ptr %dy, i32 %i.056 - %0 = load float, ptr %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, ptr %dx, i32 %i.056 - %1 = load float, ptr %arrayidx1, align 4 - %mul = fmul float %1, %da - %add = fadd float %0, %mul - store float %add, ptr %arrayidx, align 4 - %add3 = add nsw i32 %i.056, 1 - %arrayidx4 = getelementptr inbounds float, ptr %dy, i32 %add3 - %2 = load float, ptr %arrayidx4, align 4 - %arrayidx6 = getelementptr inbounds float, ptr %dx, i32 %add3 - %3 = load float, ptr %arrayidx6, align 4 - %mul7 = fmul float %3, %da - %add8 = fadd float %2, %mul7 - store float %add8, ptr %arrayidx4, align 4 - %add11 = add nsw i32 %i.056, 2 - %arrayidx12 = getelementptr inbounds float, ptr %dy, i32 %add11 - %4 = load float, ptr %arrayidx12, align 4 - %arrayidx14 = getelementptr inbounds float, ptr %dx, i32 %add11 - %5 = load float, ptr %arrayidx14, align 4 - %mul15 = fmul float %5, %da - %add16 = fadd float %4, %mul15 - store float %add16, ptr %arrayidx12, align 4 - %add19 = add nsw i32 %i.056, 3 - %arrayidx20 = getelementptr inbounds float, ptr %dy, i32 %add19 - %6 = load float, ptr %arrayidx20, align 4 - %arrayidx22 = getelementptr inbounds float, ptr %dx, i32 %add19 - %7 = load float, ptr %arrayidx22, align 4 - %mul23 = fmul float %7, %da - %add24 = fadd float %6, %mul23 - store float %add24, ptr %arrayidx20, align 4 - %add27 = add nsw i32 %i.056, 4 - %cmp = icmp slt i32 %add27, %n - br i1 %cmp, label %for.body, label %for.end - -for.end: ; preds = %for.body, %entry - ret void -} - diff --git a/llvm/test/Transforms/LoopReroll/ptrindvar.ll b/llvm/test/Transforms/LoopReroll/ptrindvar.ll deleted file mode 100644 index 90f6353197b24..0000000000000 --- a/llvm/test/Transforms/LoopReroll/ptrindvar.ll +++ /dev/null @@ -1,125 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -passes=loop-reroll %s | FileCheck %s -target triple = "aarch64--linux-gnu" - -define i32 @test(ptr readonly %buf, ptr readnone %end) #0 { -; CHECK-LABEL: define i32 @test -; CHECK-SAME: (ptr readonly [[BUF:%.*]], ptr readnone [[END:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BUF2:%.*]] = ptrtoint ptr [[BUF]] to i64 -; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 -; CHECK-NEXT: [[CMP_9:%.*]] = icmp eq ptr [[BUF]], [[END]] -; CHECK-NEXT: br i1 [[CMP_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] -; CHECK: while.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8 -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[BUF2]] -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[S_011:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ undef, [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[INDVAR]], 2 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[BUF]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SCEVGEP]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP6]], [[S_011]] -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR]], [[TMP4]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY]] ] -; CHECK-NEXT: br label [[WHILE_END]] -; CHECK: while.end: -; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD2_LCSSA]], [[WHILE_END_LOOPEXIT]] ] -; CHECK-NEXT: ret i32 [[S_0_LCSSA]] -; -entry: - %cmp.9 = icmp eq ptr %buf, %end - br i1 %cmp.9, label %while.end, label %while.body.preheader - -while.body.preheader: - br label %while.body - -while.body: - - %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] - %buf.addr.010 = phi ptr [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ] - %0 = load i32, ptr %buf.addr.010, align 4 - %add = add nsw i32 %0, %S.011 - %arrayidx1 = getelementptr inbounds i32, ptr %buf.addr.010, i64 1 - %1 = load i32, ptr %arrayidx1, align 4 - %add2 = add nsw i32 %add, %1 - %add.ptr = getelementptr inbounds i32, ptr %buf.addr.010, i64 2 - %cmp = icmp eq ptr %add.ptr, %end - br i1 %cmp, label %while.end.loopexit, label %while.body - -while.end.loopexit: - %add2.lcssa = phi i32 [ %add2, %while.body ] - br label %while.end - -while.end: - %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2.lcssa, %while.end.loopexit ] - ret i32 %S.0.lcssa -} - -define i32 @test2(ptr readonly %buf, ptr readnone %end) #0 { -; CHECK-LABEL: define i32 @test2 -; CHECK-SAME: (ptr readonly [[BUF:%.*]], ptr readnone [[END:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i64 -; CHECK-NEXT: [[BUF1:%.*]] = ptrtoint ptr [[BUF]] to i64 -; CHECK-NEXT: [[CMP_9:%.*]] = icmp eq ptr [[BUF]], [[END]] -; CHECK-NEXT: br i1 [[CMP_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] -; CHECK: while.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[BUF1]], -8 -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[END2]] -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[S_011:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ undef, [[WHILE_BODY_PREHEADER]] ] -; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[INDVAR]], -4 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[BUF]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SCEVGEP]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP6]], [[S_011]] -; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR]], [[TMP4]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY]] ] -; CHECK-NEXT: br label [[WHILE_END]] -; CHECK: while.end: -; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD2_LCSSA]], [[WHILE_END_LOOPEXIT]] ] -; CHECK-NEXT: ret i32 [[S_0_LCSSA]] -; -entry: - %cmp.9 = icmp eq ptr %buf, %end - br i1 %cmp.9, label %while.end, label %while.body.preheader - -while.body.preheader: - br label %while.body - -while.body: - - %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] - %buf.addr.010 = phi ptr [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ] - %0 = load i32, ptr %buf.addr.010, align 4 - %add = add nsw i32 %0, %S.011 - %arrayidx1 = getelementptr inbounds i32, ptr %buf.addr.010, i64 -1 - %1 = load i32, ptr %arrayidx1, align 4 - %add2 = add nsw i32 %add, %1 - %add.ptr = getelementptr inbounds i32, ptr %buf.addr.010, i64 -2 - %cmp = icmp eq ptr %add.ptr, %end - br i1 %cmp, label %while.end.loopexit, label %while.body - -while.end.loopexit: - %add2.lcssa = phi i32 [ %add2, %while.body ] - br label %while.end - -while.end: - %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2.lcssa, %while.end.loopexit ] - ret i32 %S.0.lcssa -} diff --git a/llvm/test/Transforms/LoopReroll/reduction.ll b/llvm/test/Transforms/LoopReroll/reduction.ll deleted file mode 100644 index 94f4d53bfbf68..0000000000000 --- a/llvm/test/Transforms/LoopReroll/reduction.ll +++ /dev/null @@ -1,132 +0,0 @@ -; RUN: opt < %s -passes=loop-reroll -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @foo(ptr nocapture readonly %x) #0 { -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %r.029 = phi i32 [ 0, %entry ], [ %add12, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvars.iv - %0 = load i32, ptr %arrayidx, align 4 - %add = add nsw i32 %0, %r.029 - %1 = or disjoint i64 %indvars.iv, 1 - %arrayidx3 = getelementptr inbounds i32, ptr %x, i64 %1 - %2 = load i32, ptr %arrayidx3, align 4 - %add4 = add nsw i32 %add, %2 - %3 = or disjoint i64 %indvars.iv, 2 - %arrayidx7 = getelementptr inbounds i32, ptr %x, i64 %3 - %4 = load i32, ptr %arrayidx7, align 4 - %add8 = add nsw i32 %add4, %4 - %5 = or disjoint i64 %indvars.iv, 3 - %arrayidx11 = getelementptr inbounds i32, ptr %x, i64 %5 - %6 = load i32, ptr %arrayidx11, align 4 - %add12 = add nsw i32 %add8, %6 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4 - %7 = trunc i64 %indvars.iv.next to i32 - %cmp = icmp slt i32 %7, 400 - br i1 %cmp, label %for.body, label %for.end - -; CHECK-LABEL: @foo - -; CHECK: for.body: -; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] -; CHECK: %r.029 = phi i32 [ 0, %entry ], [ %add, %for.body ] -; CHECK: %arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvar -; CHECK: %1 = load i32, ptr %arrayidx, align 4 -; CHECK: %add = add nsw i32 %1, %r.029 -; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i32 %0, 399 -; CHECK: br i1 %exitcond, label %for.end, label %for.body - -; CHECK: ret - -for.end: ; preds = %for.body - ret i32 %add12 -} - -define float @bar(ptr nocapture readonly %x) #0 { -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %r.029 = phi float [ 0.0, %entry ], [ %add12, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv - %0 = load float, ptr %arrayidx, align 4 - %add = fadd float %0, %r.029 - %1 = or disjoint i64 %indvars.iv, 1 - %arrayidx3 = getelementptr inbounds float, ptr %x, i64 %1 - %2 = load float, ptr %arrayidx3, align 4 - %add4 = fadd float %add, %2 - %3 = or disjoint i64 %indvars.iv, 2 - %arrayidx7 = getelementptr inbounds float, ptr %x, i64 %3 - %4 = load float, ptr %arrayidx7, align 4 - %add8 = fadd float %add4, %4 - %5 = or disjoint i64 %indvars.iv, 3 - %arrayidx11 = getelementptr inbounds float, ptr %x, i64 %5 - %6 = load float, ptr %arrayidx11, align 4 - %add12 = fadd float %add8, %6 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4 - %7 = trunc i64 %indvars.iv.next to i32 - %cmp = icmp slt i32 %7, 400 - br i1 %cmp, label %for.body, label %for.end - -; CHECK-LABEL: @bar - -; CHECK: for.body: -; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] -; CHECK: %r.029 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] -; CHECK: %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvar -; CHECK: %1 = load float, ptr %arrayidx, align 4 -; CHECK: %add = fadd float %1, %r.029 -; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i32 %0, 399 -; CHECK: br i1 %exitcond, label %for.end, label %for.body - -; CHECK: ret - -for.end: ; preds = %for.body - ret float %add12 -} - -define i32 @foo_unusedphi(ptr nocapture readonly %x) #0 { -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %r.029 = phi i32 [ 0, %entry ], [ %add12, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvars.iv - %0 = load i32, ptr %arrayidx, align 4 - %add = add nsw i32 %0, %0 - %1 = or disjoint i64 %indvars.iv, 1 - %arrayidx3 = getelementptr inbounds i32, ptr %x, i64 %1 - %2 = load i32, ptr %arrayidx3, align 4 - %add4 = add nsw i32 %add, %2 - %3 = or disjoint i64 %indvars.iv, 2 - %arrayidx7 = getelementptr inbounds i32, ptr %x, i64 %3 - %4 = load i32, ptr %arrayidx7, align 4 - %add8 = add nsw i32 %add4, %4 - %5 = or disjoint i64 %indvars.iv, 3 - %arrayidx11 = getelementptr inbounds i32, ptr %x, i64 %5 - %6 = load i32, ptr %arrayidx11, align 4 - %add12 = add nsw i32 %add8, %6 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4 - %7 = trunc i64 %indvars.iv.next to i32 - %cmp = icmp slt i32 %7, 400 - br i1 %cmp, label %for.body, label %for.end - -; CHECK-LABEL: @foo_unusedphi -; The above is just testing for a crash - no specific output expected. - -; CHECK: ret - -for.end: ; preds = %for.body - ret i32 %add12 -} - -attributes #0 = { nounwind readonly uwtable } - diff --git a/llvm/test/Transforms/LoopReroll/reroll_with_dbg.ll b/llvm/test/Transforms/LoopReroll/reroll_with_dbg.ll deleted file mode 100644 index e720e761f4d6c..0000000000000 --- a/llvm/test/Transforms/LoopReroll/reroll_with_dbg.ll +++ /dev/null @@ -1,130 +0,0 @@ -;RUN: opt < %s -passes=loop-reroll -S | FileCheck %s -;void foo(ptr restrict a, ptr restrict b, int n) { -; for(int i = 0; i < n; i+=4) { -; a[i] = b[i]; -; a[i+1] = b[i+1]; -; a[i+2] = b[i+2]; -; a[i+3] = b[i+3]; -; } -;} -target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" -target triple = "armv4t--linux-gnueabi" - -; Function Attrs: nounwind -define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %n) #0 !dbg !4 { -entry: -;CHECK-LABEL: @foo - - tail call void @llvm.dbg.value(metadata ptr %a, metadata !12, metadata !22), !dbg !23 - tail call void @llvm.dbg.value(metadata ptr %b, metadata !13, metadata !22), !dbg !24 - tail call void @llvm.dbg.value(metadata i32 %n, metadata !14, metadata !22), !dbg !25 - tail call void @llvm.dbg.value(metadata i32 0, metadata !15, metadata !22), !dbg !26 - %cmp.30 = icmp sgt i32 %n, 0, !dbg !27 - br i1 %cmp.30, label %for.body.preheader, label %for.cond.cleanup, !dbg !29 - -for.body.preheader: ; preds = %entry - br label %for.body, !dbg !30 - -for.cond.cleanup.loopexit: ; preds = %for.body - br label %for.cond.cleanup, !dbg !32 - -for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry - ret void, !dbg !32 - -for.body: ; preds = %for.body.preheader, %for.body -;CHECK: for.body: -;CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, {{.*}} ] -;CHECK: load -;CHECK: store -;CHECK-NOT: load -;CHECK-NOT: store -;CHECK: call void @llvm.dbg.value -;CHECK: %indvar.next = add i32 %indvar, 1 -;CHECK: icmp eq i32 %indvar - %i.031 = phi i32 [ %add13, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds float, ptr %b, i32 %i.031, !dbg !30 - %0 = load i32, ptr %arrayidx, align 4, !dbg !30, !tbaa !33 - %arrayidx1 = getelementptr inbounds float, ptr %a, i32 %i.031, !dbg !37 - store i32 %0, ptr %arrayidx1, align 4, !dbg !38, !tbaa !33 - %add = or disjoint i32 %i.031, 1, !dbg !39 - %arrayidx2 = getelementptr inbounds float, ptr %b, i32 %add, !dbg !40 - %1 = load i32, ptr %arrayidx2, align 4, !dbg !40, !tbaa !33 - %arrayidx4 = getelementptr inbounds float, ptr %a, i32 %add, !dbg !41 - store i32 %1, ptr %arrayidx4, align 4, !dbg !42, !tbaa !33 - %add5 = or disjoint i32 %i.031, 2, !dbg !43 - %arrayidx6 = getelementptr inbounds float, ptr %b, i32 %add5, !dbg !44 - %2 = load i32, ptr %arrayidx6, align 4, !dbg !44, !tbaa !33 - %arrayidx8 = getelementptr inbounds float, ptr %a, i32 %add5, !dbg !45 - store i32 %2, ptr %arrayidx8, align 4, !dbg !46, !tbaa !33 - %add9 = or disjoint i32 %i.031, 3, !dbg !47 - %arrayidx10 = getelementptr inbounds float, ptr %b, i32 %add9, !dbg !48 - %3 = load i32, ptr %arrayidx10, align 4, !dbg !48, !tbaa !33 - %arrayidx12 = getelementptr inbounds float, ptr %a, i32 %add9, !dbg !49 - store i32 %3, ptr %arrayidx12, align 4, !dbg !50, !tbaa !33 - %add13 = add nuw nsw i32 %i.031, 4, !dbg !51 - tail call void @llvm.dbg.value(metadata i32 %add13, metadata !15, metadata !22), !dbg !26 - %cmp = icmp slt i32 %add13, %n, !dbg !27 - br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !dbg !29 -} - -; Function Attrs: nounwind readnone -declare void @llvm.dbg.value(metadata, metadata, metadata) #1 - -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!17, !18, !19, !20} -!llvm.ident = !{!21} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "test.c", directory: "/home/weimingz/llvm-build/release/community-tip") -!2 = !{} -!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !11) -!5 = !DISubroutineType(types: !6) -!6 = !{null, !7, !7, !10} -!7 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !8) -!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32, align: 32) -!9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float) -!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!11 = !{!12, !13, !14, !15} -!12 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 1, type: !7) -!13 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 1, type: !7) -!14 = !DILocalVariable(name: "n", arg: 3, scope: !4, file: !1, line: 1, type: !10) -!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 2, type: !10) -!16 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 3) -!17 = !{i32 2, !"Dwarf Version", i32 4} -!18 = !{i32 2, !"Debug Info Version", i32 3} -!19 = !{i32 1, !"wchar_size", i32 4} -!20 = !{i32 1, !"min_enum_size", i32 4} -!21 = !{!"clang version 3.8.0"} -!22 = !DIExpression() -!23 = !DILocation(line: 1, column: 27, scope: !4) -!24 = !DILocation(line: 1, column: 47, scope: !4) -!25 = !DILocation(line: 1, column: 54, scope: !4) -!26 = !DILocation(line: 2, column: 11, scope: !16) -!27 = !DILocation(line: 2, column: 20, scope: !28) -!28 = distinct !DILexicalBlock(scope: !16, file: !1, line: 2, column: 3) -!29 = !DILocation(line: 2, column: 3, scope: !16) -!30 = !DILocation(line: 3, column: 12, scope: !31) -!31 = distinct !DILexicalBlock(scope: !28, file: !1, line: 2, column: 31) -!32 = !DILocation(line: 8, column: 1, scope: !4) -!33 = !{!34, !34, i64 0} -!34 = !{!"float", !35, i64 0} -!35 = !{!"omnipotent char", !36, i64 0} -!36 = !{!"Simple C/C++ TBAA"} -!37 = !DILocation(line: 3, column: 5, scope: !31) -!38 = !DILocation(line: 3, column: 10, scope: !31) -!39 = !DILocation(line: 4, column: 17, scope: !31) -!40 = !DILocation(line: 4, column: 14, scope: !31) -!41 = !DILocation(line: 4, column: 5, scope: !31) -!42 = !DILocation(line: 4, column: 12, scope: !31) -!43 = !DILocation(line: 5, column: 17, scope: !31) -!44 = !DILocation(line: 5, column: 14, scope: !31) -!45 = !DILocation(line: 5, column: 5, scope: !31) -!46 = !DILocation(line: 5, column: 12, scope: !31) -!47 = !DILocation(line: 6, column: 17, scope: !31) -!48 = !DILocation(line: 6, column: 14, scope: !31) -!49 = !DILocation(line: 6, column: 5, scope: !31) -!50 = !DILocation(line: 6, column: 12, scope: !31) -!51 = !DILocation(line: 2, column: 26, scope: !28)