diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index f66c79d915665..ae260cd153177 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -302,48 +302,6 @@ class DependenceInfo { depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions = false); - /// getSplitIteration - Give a dependence that's splittable at some - /// particular level, return the iteration that should be used to split - /// the loop. - /// - /// Generally, the dependence analyzer will be used to build - /// a dependence graph for a function (basically a map from instructions - /// to dependences). Looking for cycles in the graph shows us loops - /// that cannot be trivially vectorized/parallelized. - /// - /// We can try to improve the situation by examining all the dependences - /// that make up the cycle, looking for ones we can break. - /// Sometimes, peeling the first or last iteration of a loop will break - /// dependences, and there are flags for those possibilities. - /// Sometimes, splitting a loop at some other iteration will do the trick, - /// and we've got a flag for that case. Rather than waste the space to - /// record the exact iteration (since we rarely know), we provide - /// a method that calculates the iteration. It's a drag that it must work - /// from scratch, but wonderful in that it's possible. - /// - /// Here's an example: - /// - /// for (i = 0; i < 10; i++) - /// A[i] = ... - /// ... = A[11 - i] - /// - /// There's a loop-carried flow dependence from the store to the load, - /// found by the weak-crossing SIV test. The dependence will have a flag, - /// indicating that the dependence can be broken by splitting the loop. - /// Calling getSplitIteration will return 5. - /// Splitting the loop breaks the dependence, like so: - /// - /// for (i = 0; i <= 5; i++) - /// A[i] = ... - /// ... = A[11 - i] - /// for (i = 6; i < 10; i++) - /// A[i] = ... - /// ... = A[11 - i] - /// - /// breaks the dependence and allows us to vectorize/parallelize - /// both loops. - LLVM_ABI const SCEV *getSplitIteration(const Dependence &Dep, unsigned Level); - Function *getFunction() const { return F; } /// getRuntimeAssumptions - Returns all the runtime assumptions under which @@ -623,8 +581,7 @@ class DependenceInfo { /// If the dependence isn't proven to exist, /// marks the Result as inconsistent. bool testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, - FullDependence &Result, Constraint &NewConstraint, - const SCEV *&SplitIter) const; + FullDependence &Result, Constraint &NewConstraint) const; /// testRDIV - Tests the RDIV subscript pair (Src and Dst) for dependence. /// Things of the form [c1 + a1*i] and [c2 + a2*j] @@ -669,8 +626,7 @@ class DependenceInfo { bool weakCrossingSIVtest(const SCEV *SrcCoeff, const SCEV *SrcConst, const SCEV *DstConst, const Loop *CurrentLoop, unsigned Level, FullDependence &Result, - Constraint &NewConstraint, - const SCEV *&SplitIter) const; + Constraint &NewConstraint) const; /// ExactSIVtest - Tests the SIV subscript pair /// (Src and Dst) for dependence. @@ -838,35 +794,6 @@ class DependenceInfo { /// of the Constraints X and Y. Returns true if X has changed. bool intersectConstraints(Constraint *X, const Constraint *Y); - /// propagate - Review the constraints, looking for opportunities - /// to simplify a subscript pair (Src and Dst). - /// Return true if some simplification occurs. - /// If the simplification isn't exact (that is, if it is conservative - /// in terms of dependence), set consistent to false. - bool propagate(const SCEV *&Src, const SCEV *&Dst, SmallBitVector &Loops, - SmallVectorImpl &Constraints, bool &Consistent); - - /// propagateDistance - Attempt to propagate a distance - /// constraint into a subscript pair (Src and Dst). - /// Return true if some simplification occurs. - /// If the simplification isn't exact (that is, if it is conservative - /// in terms of dependence), set consistent to false. - bool propagateDistance(const SCEV *&Src, const SCEV *&Dst, - Constraint &CurConstraint, bool &Consistent); - - /// propagatePoint - Attempt to propagate a point - /// constraint into a subscript pair (Src and Dst). - /// Return true if some simplification occurs. - bool propagatePoint(const SCEV *&Src, const SCEV *&Dst, - Constraint &CurConstraint); - - /// propagateLine - Attempt to propagate a line - /// constraint into a subscript pair (Src and Dst). - /// Return true if some simplification occurs. - /// If the simplification isn't exact (that is, if it is conservative - /// in terms of dependence), set consistent to false. - bool propagateLine(const SCEV *&Src, const SCEV *&Dst, - Constraint &CurConstraint, bool &Consistent); /// findCoefficient - Given a linear SCEV, /// return the coefficient corresponding to specified loop. diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index da86a8d2cc9c0..a20f509b189de 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -73,8 +73,6 @@ using namespace llvm; // statistics STATISTIC(TotalArrayPairs, "Array pairs tested"); -STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); -STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); STATISTIC(ZIVapplications, "ZIV applications"); STATISTIC(ZIVindependence, "ZIV independence"); @@ -96,8 +94,6 @@ STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); STATISTIC(DeltaApplications, "Delta applications"); STATISTIC(DeltaSuccesses, "Delta successes"); -STATISTIC(DeltaIndependence, "Delta independence"); -STATISTIC(DeltaPropagations, "Delta propagations"); STATISTIC(GCDapplications, "GCD applications"); STATISTIC(GCDsuccesses, "GCD successes"); STATISTIC(GCDindependence, "GCD independence"); @@ -121,6 +117,10 @@ static cl::opt MIVMaxLevelThreshold( cl::desc("Maximum depth allowed for the recursive algorithm used to " "explore MIV direction vectors.")); +static cl::opt EnableBanerjeeMIVTest( + "da-enable-banerjee-miv-test", cl::init(false), cl::Hidden, + cl::desc("Enable Banerjee MIV test in dependence analysis.")); + //===----------------------------------------------------------------------===// // basics @@ -208,7 +208,6 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, for (unsigned Level = 1; Level <= D->getLevels(); Level++) { if (D->isSplitable(Level)) { OS << " da analyze - split level = " << Level; - OS << ", iteration = " << *DA->getSplitIteration(*D, Level); OS << "!\n"; } } @@ -1351,10 +1350,12 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, // Can determine iteration for splitting. // // Return true if dependence disproved. -bool DependenceInfo::weakCrossingSIVtest( - const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, - const Loop *CurLoop, unsigned Level, FullDependence &Result, - Constraint &NewConstraint, const SCEV *&SplitIter) const { +bool DependenceInfo::weakCrossingSIVtest(const SCEV *Coeff, + const SCEV *SrcConst, + const SCEV *DstConst, + const Loop *CurLoop, unsigned Level, + FullDependence &Result, + Constraint &NewConstraint) const { LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); @@ -1390,12 +1391,6 @@ bool DependenceInfo::weakCrossingSIVtest( } assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); - // compute SplitIter for use by DependenceInfo::getSplitIteration() - SplitIter = SE->getUDivExpr( - SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), - SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); - LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); - const SCEVConstant *ConstDelta = dyn_cast(Delta); if (!ConstDelta) return false; @@ -2232,8 +2227,8 @@ bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, // // Return true if dependence disproved. bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, - FullDependence &Result, Constraint &NewConstraint, - const SCEV *&SplitIter) const { + FullDependence &Result, + Constraint &NewConstraint) const { LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); const SCEVAddRecExpr *SrcAddRec = dyn_cast(Src); @@ -2253,7 +2248,7 @@ bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, Result, NewConstraint); else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, - Level, Result, NewConstraint, SplitIter); + Level, Result, NewConstraint); else disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, Level, Result, NewConstraint); @@ -2599,6 +2594,9 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, const SmallBitVector &Loops, FullDependence &Result) const { + if (!EnableBanerjeeMIVTest) + return false; + LLVM_DEBUG(dbgs() << "starting Banerjee\n"); ++BanerjeeApplications; LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); @@ -3116,153 +3114,8 @@ const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, AddRec->getNoWrapFlags()); } -// Review the constraints, looking for opportunities -// to simplify a subscript pair (Src and Dst). -// Return true if some simplification occurs. -// If the simplification isn't exact (that is, if it is conservative -// in terms of dependence), set consistent to false. -// Corresponds to Figure 5 from the paper -// -// Practical Dependence Testing -// Goff, Kennedy, Tseng -// PLDI 1991 -bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, - SmallBitVector &Loops, - SmallVectorImpl &Constraints, - bool &Consistent) { - bool Result = false; - for (unsigned LI : Loops.set_bits()) { - LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); - LLVM_DEBUG(Constraints[LI].dump(dbgs())); - if (Constraints[LI].isDistance()) - Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); - else if (Constraints[LI].isLine()) - Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); - else if (Constraints[LI].isPoint()) - Result |= propagatePoint(Src, Dst, Constraints[LI]); - } - return Result; -} -// Attempt to propagate a distance -// constraint into a subscript pair (Src and Dst). -// Return true if some simplification occurs. -// If the simplification isn't exact (that is, if it is conservative -// in terms of dependence), set consistent to false. -bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, - Constraint &CurConstraint, - bool &Consistent) { - const Loop *CurLoop = CurConstraint.getAssociatedLoop(); - LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); - const SCEV *A_K = findCoefficient(Src, CurLoop); - if (A_K->isZero()) - return false; - const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); - Src = SE->getMinusSCEV(Src, DA_K); - Src = zeroCoefficient(Src, CurLoop); - LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); - LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); - Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); - LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); - if (!findCoefficient(Dst, CurLoop)->isZero()) - Consistent = false; - return true; -} -// Attempt to propagate a line -// constraint into a subscript pair (Src and Dst). -// Return true if some simplification occurs. -// If the simplification isn't exact (that is, if it is conservative -// in terms of dependence), set consistent to false. -bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, - Constraint &CurConstraint, - bool &Consistent) { - const Loop *CurLoop = CurConstraint.getAssociatedLoop(); - const SCEV *A = CurConstraint.getA(); - const SCEV *B = CurConstraint.getB(); - const SCEV *C = CurConstraint.getC(); - LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C - << "\n"); - LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); - LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); - if (A->isZero()) { - const SCEVConstant *Bconst = dyn_cast(B); - const SCEVConstant *Cconst = dyn_cast(C); - if (!Bconst || !Cconst) - return false; - APInt Beta = Bconst->getAPInt(); - APInt Charlie = Cconst->getAPInt(); - APInt CdivB = Charlie.sdiv(Beta); - assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); - const SCEV *AP_K = findCoefficient(Dst, CurLoop); - Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); - Dst = zeroCoefficient(Dst, CurLoop); - if (!findCoefficient(Src, CurLoop)->isZero()) - Consistent = false; - } else if (B->isZero()) { - const SCEVConstant *Aconst = dyn_cast(A); - const SCEVConstant *Cconst = dyn_cast(C); - if (!Aconst || !Cconst) - return false; - APInt Alpha = Aconst->getAPInt(); - APInt Charlie = Cconst->getAPInt(); - APInt CdivA = Charlie.sdiv(Alpha); - assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); - const SCEV *A_K = findCoefficient(Src, CurLoop); - Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); - Src = zeroCoefficient(Src, CurLoop); - if (!findCoefficient(Dst, CurLoop)->isZero()) - Consistent = false; - } else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { - const SCEVConstant *Aconst = dyn_cast(A); - const SCEVConstant *Cconst = dyn_cast(C); - if (!Aconst || !Cconst) - return false; - APInt Alpha = Aconst->getAPInt(); - APInt Charlie = Cconst->getAPInt(); - APInt CdivA = Charlie.sdiv(Alpha); - assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); - const SCEV *A_K = findCoefficient(Src, CurLoop); - Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); - Src = zeroCoefficient(Src, CurLoop); - Dst = addToCoefficient(Dst, CurLoop, A_K); - if (!findCoefficient(Dst, CurLoop)->isZero()) - Consistent = false; - } else { - // paper is incorrect here, or perhaps just misleading - const SCEV *A_K = findCoefficient(Src, CurLoop); - Src = SE->getMulExpr(Src, A); - Dst = SE->getMulExpr(Dst, A); - Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); - Src = zeroCoefficient(Src, CurLoop); - Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); - if (!findCoefficient(Dst, CurLoop)->isZero()) - Consistent = false; - } - LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); - LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); - return true; -} - -// Attempt to propagate a point -// constraint into a subscript pair (Src and Dst). -// Return true if some simplification occurs. -bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, - Constraint &CurConstraint) { - const Loop *CurLoop = CurConstraint.getAssociatedLoop(); - const SCEV *A_K = findCoefficient(Src, CurLoop); - const SCEV *AP_K = findCoefficient(Dst, CurLoop); - const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); - const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); - LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); - Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); - Src = zeroCoefficient(Src, CurLoop); - LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); - LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); - Dst = zeroCoefficient(Dst, CurLoop); - LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); - return true; -} // Update direction vector entry based on the current constraint. void DependenceInfo::updateDirection(Dependence::DVEntry &Level, @@ -3579,8 +3432,6 @@ SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const { // Goff, Kennedy, Tseng // PLDI 1991 // -// Care is required to keep the routine below, getSplitIteration(), -// up to date with respect to this routine. std::unique_ptr DependenceInfo::depends(Instruction *Src, Instruction *Dst, bool UnderRuntimeAssumptions) { @@ -3726,68 +3577,11 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); } - SmallBitVector Separable(Pairs); - SmallBitVector Coupled(Pairs); - - // Partition subscripts into separable and minimally-coupled groups - // Algorithm in paper is algorithmically better; - // this may be faster in practice. Check someday. - // - // Here's an example of how it works. Consider this code: - // - // for (i = ...) { - // for (j = ...) { - // for (k = ...) { - // for (l = ...) { - // for (m = ...) { - // A[i][j][k][m] = ...; - // ... = A[0][j][l][i + j]; - // } - // } - // } - // } - // } - // - // There are 4 subscripts here: - // 0 [i] and [0] - // 1 [j] and [j] - // 2 [k] and [l] - // 3 [m] and [i + j] - // - // We've already classified each subscript pair as ZIV, SIV, etc., - // and collected all the loops mentioned by pair P in Pair[P].Loops. - // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops - // and set Pair[P].Group = {P}. - // - // Src Dst Classification Loops GroupLoops Group - // 0 [i] [0] SIV {1} {1} {0} - // 1 [j] [j] SIV {2} {2} {1} - // 2 [k] [l] RDIV {3,4} {3,4} {2} - // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} - // - // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. - // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. - // - // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. - // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. - // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, - // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added - // to either Separable or Coupled). - // - // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. - // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, - // so Pair[3].Group = {0, 1, 3} and Done = false. - // - // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. - // Since Done remains true, we add 2 to the set of Separable pairs. - // - // Finally, we consider 3. There's nothing to compare it with, - // so Done remains true and we add it to the Coupled set. - // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. - // - // In the end, we've got 1 separable subscript and 1 coupled group. + // Test each subscript individually for (unsigned SI = 0; SI < Pairs; ++SI) { - if (Pair[SI].Classification == Subscript::NonLinear) { + LLVM_DEBUG(dbgs() << "testing subscript " << SI); + switch (Pair[SI].Classification) { + case Subscript::NonLinear: // ignore these, but collect loops for later ++NonlinearSubscriptPairs; collectCommonLoops(Pair[SI].Src, LI->getLoopFor(Src->getParent()), @@ -3795,47 +3589,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, collectCommonLoops(Pair[SI].Dst, LI->getLoopFor(Dst->getParent()), Pair[SI].Loops); Result.Consistent = false; - } else if (Pair[SI].Classification == Subscript::ZIV) { - // always separable - Separable.set(SI); - } else { - // SIV, RDIV, or MIV, so check for coupled group - bool Done = true; - for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { - SmallBitVector Intersection = Pair[SI].GroupLoops; - Intersection &= Pair[SJ].GroupLoops; - if (Intersection.any()) { - // accumulate set of all the loops in group - Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; - // accumulate set of all subscripts in group - Pair[SJ].Group |= Pair[SI].Group; - Done = false; - } - } - if (Done) { - if (Pair[SI].Group.count() == 1) { - Separable.set(SI); - ++SeparableSubscriptPairs; - } else { - Coupled.set(SI); - ++CoupledSubscriptPairs; - } - } - } - } - - LLVM_DEBUG(dbgs() << " Separable = "); - LLVM_DEBUG(dumpSmallBitVector(Separable)); - LLVM_DEBUG(dbgs() << " Coupled = "); - LLVM_DEBUG(dumpSmallBitVector(Coupled)); - - Constraint NewConstraint; - NewConstraint.setAny(SE); - - // test separable subscripts - for (unsigned SI : Separable.set_bits()) { - LLVM_DEBUG(dbgs() << "testing subscript " << SI); - switch (Pair[SI].Classification) { + break; case Subscript::ZIV: LLVM_DEBUG(dbgs() << ", ZIV\n"); if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) @@ -3844,9 +3598,9 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, case Subscript::SIV: { LLVM_DEBUG(dbgs() << ", SIV\n"); unsigned Level; - const SCEV *SplitIter = nullptr; - if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, - SplitIter)) + Constraint NewConstraint; + NewConstraint.setAny(SE); + if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint)) return nullptr; break; } @@ -3860,125 +3614,6 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) return nullptr; break; - default: - llvm_unreachable("subscript has unexpected classification"); - } - } - - if (Coupled.count()) { - // test coupled subscript groups - LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); - LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); - SmallVector Constraints(MaxLevels + 1); - for (unsigned II = 0; II <= MaxLevels; ++II) - Constraints[II].setAny(SE); - for (unsigned SI : Coupled.set_bits()) { - LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); - SmallBitVector Group(Pair[SI].Group); - SmallBitVector Sivs(Pairs); - SmallBitVector Mivs(Pairs); - SmallBitVector ConstrainedLevels(MaxLevels + 1); - SmallVector PairsInGroup; - for (unsigned SJ : Group.set_bits()) { - LLVM_DEBUG(dbgs() << SJ << " "); - if (Pair[SJ].Classification == Subscript::SIV) - Sivs.set(SJ); - else - Mivs.set(SJ); - PairsInGroup.push_back(&Pair[SJ]); - } - unifySubscriptType(PairsInGroup); - LLVM_DEBUG(dbgs() << "}\n"); - while (Sivs.any()) { - bool Changed = false; - for (unsigned SJ : Sivs.set_bits()) { - LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); - // SJ is an SIV subscript that's part of the current coupled group - unsigned Level; - const SCEV *SplitIter = nullptr; - LLVM_DEBUG(dbgs() << "SIV\n"); - if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, - SplitIter)) - return nullptr; - ConstrainedLevels.set(Level); - if (intersectConstraints(&Constraints[Level], &NewConstraint)) { - if (Constraints[Level].isEmpty()) { - ++DeltaIndependence; - return nullptr; - } - Changed = true; - } - Sivs.reset(SJ); - } - if (Changed) { - // propagate, possibly creating new SIVs and ZIVs - LLVM_DEBUG(dbgs() << " propagating\n"); - LLVM_DEBUG(dbgs() << "\tMivs = "); - LLVM_DEBUG(dumpSmallBitVector(Mivs)); - for (unsigned SJ : Mivs.set_bits()) { - // SJ is an MIV subscript that's part of the current coupled group - LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); - if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, - Constraints, Result.Consistent)) { - LLVM_DEBUG(dbgs() << "\t Changed\n"); - ++DeltaPropagations; - Pair[SJ].Classification = classifyPair( - Pair[SJ].Src, LI->getLoopFor(Src->getParent()), Pair[SJ].Dst, - LI->getLoopFor(Dst->getParent()), Pair[SJ].Loops); - switch (Pair[SJ].Classification) { - case Subscript::ZIV: - LLVM_DEBUG(dbgs() << "ZIV\n"); - if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) - return nullptr; - Mivs.reset(SJ); - break; - case Subscript::SIV: - Sivs.set(SJ); - Mivs.reset(SJ); - break; - case Subscript::RDIV: - case Subscript::MIV: - break; - default: - llvm_unreachable("bad subscript classification"); - } - } - } - } - } - - // test & propagate remaining RDIVs - for (unsigned SJ : Mivs.set_bits()) { - if (Pair[SJ].Classification == Subscript::RDIV) { - LLVM_DEBUG(dbgs() << "RDIV test\n"); - if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) - return nullptr; - // I don't yet understand how to propagate RDIV results - Mivs.reset(SJ); - } - } - - // test remaining MIVs - // This code is temporary. - // Better to somehow test all remaining subscripts simultaneously. - for (unsigned SJ : Mivs.set_bits()) { - if (Pair[SJ].Classification == Subscript::MIV) { - LLVM_DEBUG(dbgs() << "MIV test\n"); - if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) - return nullptr; - } else - llvm_unreachable("expected only MIV subscripts at this point"); - } - - // update Result.DV from constraint vector - LLVM_DEBUG(dbgs() << " updating\n"); - for (unsigned SJ : ConstrainedLevels.set_bits()) { - if (SJ > CommonLevels) - break; - updateDirection(Result.DV[SJ - 1], Constraints[SJ]); - if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) - return nullptr; - } } } @@ -4038,223 +3673,3 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, return std::make_unique(std::move(Result)); } - -//===----------------------------------------------------------------------===// -// getSplitIteration - -// Rather than spend rarely-used space recording the splitting iteration -// during the Weak-Crossing SIV test, we re-compute it on demand. -// The re-computation is basically a repeat of the entire dependence test, -// though simplified since we know that the dependence exists. -// It's tedious, since we must go through all propagations, etc. -// -// Care is required to keep this code up to date with respect to the routine -// above, depends(). -// -// Generally, the dependence analyzer will be used to build -// a dependence graph for a function (basically a map from instructions -// to dependences). Looking for cycles in the graph shows us loops -// that cannot be trivially vectorized/parallelized. -// -// We can try to improve the situation by examining all the dependences -// that make up the cycle, looking for ones we can break. -// Sometimes, peeling the first or last iteration of a loop will break -// dependences, and we've got flags for those possibilities. -// Sometimes, splitting a loop at some other iteration will do the trick, -// and we've got a flag for that case. Rather than waste the space to -// record the exact iteration (since we rarely know), we provide -// a method that calculates the iteration. It's a drag that it must work -// from scratch, but wonderful in that it's possible. -// -// Here's an example: -// -// for (i = 0; i < 10; i++) -// A[i] = ... -// ... = A[11 - i] -// -// There's a loop-carried flow dependence from the store to the load, -// found by the weak-crossing SIV test. The dependence will have a flag, -// indicating that the dependence can be broken by splitting the loop. -// Calling getSplitIteration will return 5. -// Splitting the loop breaks the dependence, like so: -// -// for (i = 0; i <= 5; i++) -// A[i] = ... -// ... = A[11 - i] -// for (i = 6; i < 10; i++) -// A[i] = ... -// ... = A[11 - i] -// -// breaks the dependence and allows us to vectorize/parallelize -// both loops. -const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, - unsigned SplitLevel) { - assert(Dep.isSplitable(SplitLevel) && - "Dep should be splitable at SplitLevel"); - Instruction *Src = Dep.getSrc(); - Instruction *Dst = Dep.getDst(); - assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); - assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); - assert(isLoadOrStore(Src)); - assert(isLoadOrStore(Dst)); - Value *SrcPtr = getLoadStorePointerOperand(Src); - Value *DstPtr = getLoadStorePointerOperand(Dst); - assert(underlyingObjectsAlias( - AA, F->getDataLayout(), MemoryLocation::get(Dst), - MemoryLocation::get(Src)) == AliasResult::MustAlias); - - // establish loop nesting levels - establishNestingLevels(Src, Dst); - - FullDependence Result(Src, Dst, Dep.Assumptions, false, CommonLevels); - - unsigned Pairs = 1; - SmallVector Pair(Pairs); - const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); - const SCEV *DstSCEV = SE->getSCEV(DstPtr); - Pair[0].Src = SrcSCEV; - Pair[0].Dst = DstSCEV; - - if (Delinearize) { - if (tryDelinearize(Src, Dst, Pair)) { - LLVM_DEBUG(dbgs() << " delinearized\n"); - Pairs = Pair.size(); - } - } - - for (unsigned P = 0; P < Pairs; ++P) { - Pair[P].Loops.resize(MaxLevels + 1); - Pair[P].GroupLoops.resize(MaxLevels + 1); - Pair[P].Group.resize(Pairs); - removeMatchingExtensions(&Pair[P]); - Pair[P].Classification = - classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), Pair[P].Dst, - LI->getLoopFor(Dst->getParent()), Pair[P].Loops); - Pair[P].GroupLoops = Pair[P].Loops; - Pair[P].Group.set(P); - } - - SmallBitVector Separable(Pairs); - SmallBitVector Coupled(Pairs); - - // partition subscripts into separable and minimally-coupled groups - for (unsigned SI = 0; SI < Pairs; ++SI) { - if (Pair[SI].Classification == Subscript::NonLinear) { - // ignore these, but collect loops for later - collectCommonLoops(Pair[SI].Src, LI->getLoopFor(Src->getParent()), - Pair[SI].Loops); - collectCommonLoops(Pair[SI].Dst, LI->getLoopFor(Dst->getParent()), - Pair[SI].Loops); - Result.Consistent = false; - } else if (Pair[SI].Classification == Subscript::ZIV) - Separable.set(SI); - else { - // SIV, RDIV, or MIV, so check for coupled group - bool Done = true; - for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { - SmallBitVector Intersection = Pair[SI].GroupLoops; - Intersection &= Pair[SJ].GroupLoops; - if (Intersection.any()) { - // accumulate set of all the loops in group - Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; - // accumulate set of all subscripts in group - Pair[SJ].Group |= Pair[SI].Group; - Done = false; - } - } - if (Done) { - if (Pair[SI].Group.count() == 1) - Separable.set(SI); - else - Coupled.set(SI); - } - } - } - - Constraint NewConstraint; - NewConstraint.setAny(SE); - - // test separable subscripts - for (unsigned SI : Separable.set_bits()) { - switch (Pair[SI].Classification) { - case Subscript::SIV: { - unsigned Level; - const SCEV *SplitIter = nullptr; - (void)testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, - SplitIter); - if (Level == SplitLevel) { - assert(SplitIter != nullptr); - return SplitIter; - } - break; - } - case Subscript::ZIV: - case Subscript::RDIV: - case Subscript::MIV: - break; - default: - llvm_unreachable("subscript has unexpected classification"); - } - } - - assert(!Coupled.empty() && "coupled expected non-empty"); - - // test coupled subscript groups - SmallVector Constraints(MaxLevels + 1); - for (unsigned II = 0; II <= MaxLevels; ++II) - Constraints[II].setAny(SE); - for (unsigned SI : Coupled.set_bits()) { - SmallBitVector Group(Pair[SI].Group); - SmallBitVector Sivs(Pairs); - SmallBitVector Mivs(Pairs); - SmallBitVector ConstrainedLevels(MaxLevels + 1); - for (unsigned SJ : Group.set_bits()) { - if (Pair[SJ].Classification == Subscript::SIV) - Sivs.set(SJ); - else - Mivs.set(SJ); - } - while (Sivs.any()) { - bool Changed = false; - for (unsigned SJ : Sivs.set_bits()) { - // SJ is an SIV subscript that's part of the current coupled group - unsigned Level; - const SCEV *SplitIter = nullptr; - (void)testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, - SplitIter); - if (Level == SplitLevel && SplitIter) - return SplitIter; - ConstrainedLevels.set(Level); - if (intersectConstraints(&Constraints[Level], &NewConstraint)) - Changed = true; - Sivs.reset(SJ); - } - if (!Changed) - continue; - // propagate, possibly creating new SIVs and ZIVs - for (unsigned SJ : Mivs.set_bits()) { - // SJ is an MIV subscript that's part of the current coupled group - if (!propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Constraints, - Result.Consistent)) - continue; - Pair[SJ].Classification = classifyPair( - Pair[SJ].Src, LI->getLoopFor(Src->getParent()), Pair[SJ].Dst, - LI->getLoopFor(Dst->getParent()), Pair[SJ].Loops); - switch (Pair[SJ].Classification) { - case Subscript::ZIV: - Mivs.reset(SJ); - break; - case Subscript::SIV: - Sivs.set(SJ); - Mivs.reset(SJ); - break; - case Subscript::RDIV: - case Subscript::MIV: - break; - default: - llvm_unreachable("bad subscript classification"); - } - } - } - } - llvm_unreachable("somehow reached end of routine"); -} diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll index e0def901d1759..d6d312e7ec116 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -disable-output -da-delinearize=false "-passes=print" \ +; RUN: opt < %s -disable-output -da-delinearize=false "-passes=print" -da-enable-banerjee-miv-test \ ; RUN: -aa-pipeline=basic-aa 2>&1 | FileCheck %s -; RUN: opt < %s -disable-output -da-delinearize=false -passes='print' \ +; RUN: opt < %s -disable-output -da-delinearize=false -passes='print' -da-enable-banerjee-miv-test \ ; RUN: -aa-pipeline=basic-aa 2>&1 | FileCheck %s -check-prefix=NORMALIZE -; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: opt < %s -disable-output "-passes=print" -da-enable-banerjee-miv-test -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s -check-prefix=DELIN target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/DependenceAnalysis/Coupled.ll b/llvm/test/Analysis/DependenceAnalysis/Coupled.ll deleted file mode 100644 index 1d4513429a83c..0000000000000 --- a/llvm/test/Analysis/DependenceAnalysis/Coupled.ll +++ /dev/null @@ -1,766 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ -; RUN: | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.6.0" - - -;; for (long int i = 0; i < 50; i++) { -;; A[i][i] = i; -;; *B++ = A[i + 10][i + 9]; - -define void @couple0(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple0' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: store i32 %conv, ptr %arrayidx1, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %arrayidx1 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - store i32 %conv, ptr %arrayidx1, align 4 - %add = add nsw i64 %i.02, 9 - %add2 = add nsw i64 %i.02, 10 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr %A, i64 %add2, i64 %add - %0 = load i32, ptr %arrayidx4, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i < 50; i++) { -;; A[i][i] = i; -;; *B++ = A[i + 9][i + 9]; - -define void @couple1(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple1' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: store i32 %conv, ptr %arrayidx1, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - consistent flow [-9]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %arrayidx1 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - store i32 %conv, ptr %arrayidx1, align 4 - %add = add nsw i64 %i.02, 9 - %add2 = add nsw i64 %i.02, 9 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr %A, i64 %add2, i64 %add - %0 = load i32, ptr %arrayidx4, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i < 50; i++) { -;; A[3*i - 6][3*i - 6] = i; -;; *B++ = A[i][i]; - -define void @couple2(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple2' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: store i32 %conv, ptr %arrayidx3, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx5, align 4 -; CHECK-NEXT: da analyze - flow [*|<]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx5, align 4 --> Dst: %0 = load i32, ptr %arrayidx5, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx5, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %mul = mul nsw i64 %i.02, 3 - %sub = add nsw i64 %mul, -6 - %mul1 = mul nsw i64 %i.02, 3 - %sub2 = add nsw i64 %mul1, -6 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub2, i64 %sub - store i32 %conv, ptr %arrayidx3, align 4 - %arrayidx5 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx5, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i < 50; i++) { -;; A[3*i - 6][3*i - 5] = i; -;; *B++ = A[i][i]; - -define void @couple3(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple3' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: store i32 %conv, ptr %arrayidx3, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx5, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx5, align 4 --> Dst: %0 = load i32, ptr %arrayidx5, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx5, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %mul = mul nsw i64 %i.02, 3 - %sub = add nsw i64 %mul, -5 - %mul1 = mul nsw i64 %i.02, 3 - %sub2 = add nsw i64 %mul1, -6 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub2, i64 %sub - store i32 %conv, ptr %arrayidx3, align 4 - %arrayidx5 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx5, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i < 50; i++) { -;; A[3*i - 6][3*i - n] = i; -;; *B++ = A[i][i]; - -define void @couple4(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple4' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %conv, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx6, align 4 -; CHECK-NEXT: da analyze - flow [*|<]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx6, align 4 --> Dst: %0 = load i32, ptr %arrayidx6, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx6, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %mul = mul nsw i64 %i.02, 3 - %conv1 = sext i32 %n to i64 - %sub = sub nsw i64 %mul, %conv1 - %mul2 = mul nsw i64 %i.02, 3 - %sub3 = add nsw i64 %mul2, -6 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub3, i64 %sub - store i32 %conv, ptr %arrayidx4, align 4 - %arrayidx6 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx6, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i < 50; i++) { -;; A[3*i - n + 1][3*i - n] = i; -;; *B++ = A[i][i]; - -define void @couple5(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple5' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %conv, ptr %arrayidx5, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %mul = mul nsw i64 %i.02, 3 - %conv1 = sext i32 %n to i64 - %sub = sub nsw i64 %mul, %conv1 - %mul2 = mul nsw i64 %i.02, 3 - %conv3 = sext i32 %n to i64 - %sub4 = sub nsw i64 %mul2, %conv3 - %add = add nsw i64 %sub4, 1 - %arrayidx5 = getelementptr inbounds [100 x i32], ptr %A, i64 %add, i64 %sub - store i32 %conv, ptr %arrayidx5, align 4 - %arrayidx7 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx7, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i < 50; i++) { -;; A[i][3*i - 6] = i; -;; *B++ = A[i][i]; - -define void @couple6(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple6' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: store i32 %conv, ptr %arrayidx1, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4 -; CHECK-NEXT: da analyze - flow [0|<]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %mul = mul nsw i64 %i.02, 3 - %sub = add nsw i64 %mul, -6 - %arrayidx1 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %sub - store i32 %conv, ptr %arrayidx1, align 4 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx3, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i < 50; i++) { -;; A[i][3*i - 5] = i; -;; *B++ = A[i][i]; - -define void @couple7(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple7' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: store i32 %conv, ptr %arrayidx1, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx1, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %mul = mul nsw i64 %i.02, 3 - %sub = add nsw i64 %mul, -5 - %arrayidx1 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %sub - store i32 %conv, ptr %arrayidx1, align 4 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx3, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i <= 15; i++) { -;; A[3*i - 18][3 - i] = i; -;; *B++ = A[i][i]; - -define void @couple8(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple8' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %conv, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %sub = sub nsw i64 3, %i.02 - %mul = mul nsw i64 %i.02, 3 - %sub1 = add nsw i64 %mul, -18 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub1, i64 %sub - store i32 %conv, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx4, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 16 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i <= 15; i++) { -;; A[3*i - 18][2 - i] = i; -;; *B++ = A[i][i]; - -define void @couple9(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple9' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %conv, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %sub = sub nsw i64 2, %i.02 - %mul = mul nsw i64 %i.02, 3 - %sub1 = add nsw i64 %mul, -18 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub1, i64 %sub - store i32 %conv, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx4, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 16 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i <= 15; i++) { -;; A[3*i - 18][6 - i] = i; -;; *B++ = A[i][i]; - -define void @couple10(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple10' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %conv, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - flow [>]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %sub = sub nsw i64 6, %i.02 - %mul = mul nsw i64 %i.02, 3 - %sub1 = add nsw i64 %mul, -18 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub1, i64 %sub - store i32 %conv, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx4, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 16 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i <= 15; i++) { -;; A[3*i - 18][18 - i] = i; -;; *B++ = A[i][i]; - -define void @couple11(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple11' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %conv, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - flow [0|<] splitable! -; CHECK-NEXT: da analyze - split level = 1, iteration = 9! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %sub = sub nsw i64 18, %i.02 - %mul = mul nsw i64 %i.02, 3 - %sub1 = add nsw i64 %mul, -18 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub1, i64 %sub - store i32 %conv, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx4, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 16 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i <= 12; i++) { -;; A[3*i - 18][22 - i] = i; -;; *B++ = A[i][i]; - -define void @couple12(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple12' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %conv, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - flow [<] splitable! -; CHECK-NEXT: da analyze - split level = 1, iteration = 11! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %sub = sub nsw i64 22, %i.02 - %mul = mul nsw i64 %i.02, 3 - %sub1 = add nsw i64 %mul, -18 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub1, i64 %sub - store i32 %conv, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx4, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 13 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i < 12; i++) { -;; A[3*i - 18][22 - i] = i; -;; *B++ = A[i][i]; - -define void @couple13(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple13' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %conv, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %sub = sub nsw i64 22, %i.02 - %mul = mul nsw i64 %i.02, 3 - %sub1 = add nsw i64 %mul, -18 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub1, i64 %sub - store i32 %conv, ptr %arrayidx2, align 4 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr %A, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx4, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 12 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - -;; for (long int i = 0; i < 100; i++) { -;; A[3*i - 18][18 - i][i] = i; -;; *B++ = A[i][i][i]; - -define void @couple14(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple14' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: store i32 %conv, ptr %arrayidx3, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx6, align 4 -; CHECK-NEXT: da analyze - flow [0|<]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx6, align 4 --> Dst: %0 = load i32, ptr %arrayidx6, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx6, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %sub = sub nsw i64 18, %i.02 - %mul = mul nsw i64 %i.02, 3 - %sub1 = add nsw i64 %mul, -18 - %arrayidx3 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i64 %sub1, i64 %sub, i64 %i.02 - store i32 %conv, ptr %arrayidx3, align 4 - %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i64 %i.02, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx6, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 100 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - - -;; for (long int i = 0; i < 100; i++) { -;; A[3*i - 18][22 - i][i] = i; -;; *B++ = A[i][i][i]; - -define void @couple15(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'couple15' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: store i32 %conv, ptr %arrayidx3, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx6, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx6, align 4 --> Dst: %0 = load i32, ptr %arrayidx6, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx6, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %B.addr.01 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ] - %conv = trunc i64 %i.02 to i32 - %sub = sub nsw i64 22, %i.02 - %mul = mul nsw i64 %i.02, 3 - %sub1 = add nsw i64 %mul, -18 - %arrayidx3 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i64 %sub1, i64 %sub, i64 %i.02 - store i32 %conv, ptr %arrayidx3, align 4 - %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i64 %i.02, i64 %i.02, i64 %i.02 - %0 = load i32, ptr %arrayidx6, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.01, i64 1 - store i32 %0, ptr %B.addr.01, align 4 - %inc = add nsw i64 %i.02, 1 - %exitcond = icmp ne i64 %inc, 100 - br i1 %exitcond, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - -;; for(int i = 0; i < N; i+=1) { -;; A[M*N*i] = 1; -;; for(int j = 0; j < M; j+=1) -;; A[M*N + M*i + j] = 2; - -; FIXME: Currently failing to infer %M being positive. - -define void @couple_weakzerosiv(ptr noalias nocapture %A, i64 %N, i64 %M) { -; CHECK-LABEL: 'couple_weakzerosiv' -; CHECK-NEXT: Src: store i32 1, ptr %arrayidx.us, align 4 --> Dst: store i32 1, ptr %arrayidx.us, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: store i32 1, ptr %arrayidx.us, align 4 --> Dst: store i32 2, ptr %arrayidx9.us, align 4 -; CHECK-NEXT: da analyze - output [*|<]! -; CHECK-NEXT: Src: store i32 2, ptr %arrayidx9.us, align 4 --> Dst: store i32 2, ptr %arrayidx9.us, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - %cmp29 = icmp sgt i64 %N, 0 - br i1 %cmp29, label %for.body.lr.ph, label %for.cond.cleanup - -for.body.lr.ph: ; preds = %entry - %mul = mul nsw i64 %M, %N - br label %for.body.us - -for.body.us: ; preds = %for.body.lr.ph, %for.cond.cleanup4.loopexit.us - %i.030.us = phi i64 [ %add12.us, %for.cond.cleanup4.loopexit.us ], [ 0, %for.body.lr.ph ] - %mul1.us = mul nsw i64 %i.030.us, %mul - %arrayidx.us = getelementptr inbounds i32, ptr %A, i64 %mul1.us - store i32 1, ptr %arrayidx.us, align 4 - %mul6.us = mul nsw i64 %i.030.us, %M - %add.us = add i64 %mul6.us, %mul - br label %for.body5.us - -for.body5.us: ; preds = %for.body5.us, %for.body.us - %j.028.us = phi i64 [ 0, %for.body.us ], [ %add10.us, %for.body5.us ] - %add8.us = add i64 %add.us, %j.028.us - %arrayidx9.us = getelementptr inbounds i32, ptr %A, i64 %add8.us - store i32 2, ptr %arrayidx9.us, align 4 - %add10.us = add nuw nsw i64 %j.028.us, 1 - %exitcond.us = icmp eq i64 %add10.us, %M - br i1 %exitcond.us, label %for.cond.cleanup4.loopexit.us, label %for.body5.us - -for.cond.cleanup4.loopexit.us: ; preds = %for.body5.us - %add12.us = add nuw nsw i64 %i.030.us, 1 - %exitcond31.us = icmp eq i64 %add12.us, %N - br i1 %exitcond31.us, label %for.cond.cleanup, label %for.body.us - -for.cond.cleanup: ; preds = %for.cond.cleanup4.loopexit.us, %entry - ret void -} diff --git a/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll index b5ece14121686..4832417f62e28 100644 --- a/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: opt < %s -disable-output "-passes=print" -da-enable-banerjee-miv-test -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s ; ModuleID = 'ExactRDIV.bc' diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll index 03343e7a98211..59812ef8c5391 100644 --- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll +++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: opt < %s -disable-output "-passes=print" -da-enable-banerjee-miv-test -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll deleted file mode 100644 index e5d5d21e365a1..0000000000000 --- a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll +++ /dev/null @@ -1,108 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ -; RUN: | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.6.0" - -; for (int i = 0; i < 100; ++i) { -; int t0 = a[i][i]; -; int t1 = t0 + 1; -; a[i][5] = t1; -; } -; The subscript 5 in a[i][5] is deliberately an i32, mismatching the types of -; other subscript. DependenceAnalysis before the fix crashed due to this -; mismatch. -define void @i32_subscript(ptr %a, ptr %b) { -; CHECK-LABEL: 'i32_subscript' -; CHECK-NEXT: Src: %0 = load i32, ptr %a.addr, align 4 --> Dst: %0 = load i32, ptr %a.addr, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %0 = load i32, ptr %a.addr, align 4 --> Dst: store i32 %1, ptr %a.addr.2, align 4 -; CHECK-NEXT: da analyze - anti [0|<]! -; CHECK-NEXT: Src: store i32 %1, ptr %a.addr.2, align 4 --> Dst: store i32 %1, ptr %a.addr.2, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.inc, %for.body ] - %a.addr = getelementptr [100 x [100 x i32]], ptr %a, i64 0, i64 %i, i64 %i - %a.addr.2 = getelementptr [100 x [100 x i32]], ptr %a, i64 0, i64 %i, i32 5 - %0 = load i32, ptr %a.addr, align 4 - %1 = add i32 %0, 1 - store i32 %1, ptr %a.addr.2, align 4 - %i.inc = add nsw i64 %i, 1 - %exitcond = icmp ne i64 %i.inc, 100 - br i1 %exitcond, label %for.body, label %for.end - -for.end: - ret void -} - -; unsigned i, j; -; for (i = 1; i < SIZE; i++) { -; for (j = i; j < SIZE; j++) { -; a[i][j] = a[i+1][j-1] + 2; -; } -; } -; Extends the previous example to coupled MIV subscripts. - - -@a = global [10004 x [10004 x i32]] zeroinitializer, align 16 - -; Function Attrs: nounwind uwtable -define void @coupled_miv_type_mismatch(i32 %n) #0 { -; CHECK-LABEL: 'coupled_miv_type_mismatch' -; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx5, align 4 --> Dst: %2 = load i32, ptr %arrayidx5, align 4 -; CHECK-NEXT: da analyze - none! -; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx5, align 4 --> Dst: store i32 %add6, ptr %arrayidx10, align 4 -; CHECK-NEXT: da analyze - consistent anti [1 -2]! -; CHECK-NEXT: Src: store i32 %add6, ptr %arrayidx10, align 4 --> Dst: store i32 %add6, ptr %arrayidx10, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.cond - -for.cond: ; preds = %for.inc11, %entry - %indvars.iv11 = phi i64 [ %indvars.iv.next12, %for.inc11 ], [ 1, %entry ] - %exitcond14 = icmp ne i64 %indvars.iv11, 10000 - br i1 %exitcond14, label %for.cond1.preheader, label %for.end13 - -for.cond1.preheader: ; preds = %for.cond - %0 = trunc i64 %indvars.iv11 to i32 - br label %for.cond1 - -for.cond1: ; preds = %for.cond1.preheader, %for.body3 - %indvars.iv8 = phi i64 [ %indvars.iv11, %for.cond1.preheader ], [ %indvars.iv.next9, %for.body3 ] - %j.0 = phi i32 [ %inc, %for.body3 ], [ %0, %for.cond1.preheader ] - %lftr.wideiv = trunc i64 %indvars.iv8 to i32 - %exitcond = icmp ne i32 %lftr.wideiv, 10000 - br i1 %exitcond, label %for.body3, label %for.inc11 - -for.body3: ; preds = %for.cond1 - %sub = add nsw i32 %j.0, -1 - %idxprom = zext i32 %sub to i64 - %1 = add nuw nsw i64 %indvars.iv11, 1 - %arrayidx5 = getelementptr inbounds [10004 x [10004 x i32]], ptr @a, i64 0, i64 %1, i64 %idxprom - %2 = load i32, ptr %arrayidx5, align 4 - %add6 = add nsw i32 %2, 2 - %arrayidx10 = getelementptr inbounds [10004 x [10004 x i32]], ptr @a, i64 0, i64 %indvars.iv11, i64 %indvars.iv8 - store i32 %add6, ptr %arrayidx10, align 4 - %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 - %inc = add nuw nsw i32 %j.0, 1 - br label %for.cond1 - -for.inc11: ; preds = %for.cond1 - %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1 - br label %for.cond - -for.end13: ; preds = %for.cond - ret void -} - -attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.ident = !{!0} - -!0 = !{!"clang version 3.7.0"} diff --git a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll index 866f515baeafb..0713887291a3e 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: opt < %s -disable-output "-passes=print" -da-enable-banerjee-miv-test -aa-pipeline=basic-aa 2>&1 \ ; RUN: | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Analysis/DependenceAnalysis/Separability.ll b/llvm/test/Analysis/DependenceAnalysis/Separability.ll deleted file mode 100644 index 2ed9cca4d1fc0..0000000000000 --- a/llvm/test/Analysis/DependenceAnalysis/Separability.ll +++ /dev/null @@ -1,334 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ -; RUN: | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.6.0" - - -;; for (long int i = 0; i < 50; i++) -;; for (long int j = 0; j < 50; j++) -;; for (long int k = 0; k < 50; k++) -;; for (long int l = 0; l < 50; l++) { -;; A[n][i][j + k] = i; -;; *B++ = A[10][i + 10][2*j - l]; - -define void @sep0(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'sep0' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx11, align 4 --> Dst: store i32 %conv, ptr %arrayidx11, align 4 -; CHECK-NEXT: da analyze - output [0 * * S]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx11, align 4 --> Dst: %0 = load i32, ptr %arrayidx15, align 4 -; CHECK-NEXT: da analyze - flow [* * * *|<]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx11, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx15, align 4 --> Dst: %0 = load i32, ptr %arrayidx15, align 4 -; CHECK-NEXT: da analyze - input [* * S *]! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx15, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.31, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.cond1.preheader - -for.cond1.preheader: ; preds = %entry, %for.inc22 - %B.addr.08 = phi ptr [ %B, %entry ], [ %scevgep11, %for.inc22 ] - %i.07 = phi i64 [ 0, %entry ], [ %inc23, %for.inc22 ] - br label %for.cond4.preheader - -for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc19 - %B.addr.16 = phi ptr [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc19 ] - %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc20, %for.inc19 ] - br label %for.cond7.preheader - -for.cond7.preheader: ; preds = %for.cond4.preheader, %for.inc16 - %B.addr.24 = phi ptr [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc16 ] - %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc17, %for.inc16 ] - br label %for.body9 - -for.body9: ; preds = %for.cond7.preheader, %for.body9 - %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ] - %B.addr.31 = phi ptr [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ] - %conv = trunc i64 %i.07 to i32 - %add = add nsw i64 %j.05, %k.03 - %idxprom = sext i32 %n to i64 - %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i64 %idxprom, i64 %i.07, i64 %add - store i32 %conv, ptr %arrayidx11, align 4 - %mul = shl nsw i64 %j.05, 1 - %sub = sub nsw i64 %mul, %l.02 - %add12 = add nsw i64 %i.07, 10 - %arrayidx15 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i64 10, i64 %add12, i64 %sub - %0 = load i32, ptr %arrayidx15, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.31, i64 1 - store i32 %0, ptr %B.addr.31, align 4 - %inc = add nsw i64 %l.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body9, label %for.inc16 - -for.inc16: ; preds = %for.body9 - %scevgep = getelementptr i32, ptr %B.addr.24, i64 50 - %inc17 = add nsw i64 %k.03, 1 - %exitcond10 = icmp ne i64 %inc17, 50 - br i1 %exitcond10, label %for.cond7.preheader, label %for.inc19 - -for.inc19: ; preds = %for.inc16 - %scevgep9 = getelementptr i32, ptr %B.addr.16, i64 2500 - %inc20 = add nsw i64 %j.05, 1 - %exitcond12 = icmp ne i64 %inc20, 50 - br i1 %exitcond12, label %for.cond4.preheader, label %for.inc22 - -for.inc22: ; preds = %for.inc19 - %scevgep11 = getelementptr i32, ptr %B.addr.08, i64 125000 - %inc23 = add nsw i64 %i.07, 1 - %exitcond13 = icmp ne i64 %inc23, 50 - br i1 %exitcond13, label %for.cond1.preheader, label %for.end24 - -for.end24: ; preds = %for.inc22 - ret void -} - - -;; for (long int i = 0; i < 50; i++) -;; for (long int j = 0; j < 50; j++) -;; for (long int k = 0; k < 50; k++) -;; for (long int l = 0; l < 50; l++) { -;; A[i][i][j + k] = i; -;; *B++ = A[10][i + 10][2*j - l]; - -define void @sep1(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'sep1' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx11, align 4 --> Dst: store i32 %conv, ptr %arrayidx11, align 4 -; CHECK-NEXT: da analyze - output [0 * * S]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx11, align 4 --> Dst: %0 = load i32, ptr %arrayidx15, align 4 -; CHECK-NEXT: da analyze - flow [* * * *|<]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx11, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx15, align 4 --> Dst: %0 = load i32, ptr %arrayidx15, align 4 -; CHECK-NEXT: da analyze - input [* * S *]! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx15, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.31, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.cond1.preheader - -for.cond1.preheader: ; preds = %entry, %for.inc22 - %B.addr.08 = phi ptr [ %B, %entry ], [ %scevgep11, %for.inc22 ] - %i.07 = phi i64 [ 0, %entry ], [ %inc23, %for.inc22 ] - br label %for.cond4.preheader - -for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc19 - %B.addr.16 = phi ptr [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc19 ] - %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc20, %for.inc19 ] - br label %for.cond7.preheader - -for.cond7.preheader: ; preds = %for.cond4.preheader, %for.inc16 - %B.addr.24 = phi ptr [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc16 ] - %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc17, %for.inc16 ] - br label %for.body9 - -for.body9: ; preds = %for.cond7.preheader, %for.body9 - %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ] - %B.addr.31 = phi ptr [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ] - %conv = trunc i64 %i.07 to i32 - %add = add nsw i64 %j.05, %k.03 - %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i64 %i.07, i64 %i.07, i64 %add - store i32 %conv, ptr %arrayidx11, align 4 - %mul = shl nsw i64 %j.05, 1 - %sub = sub nsw i64 %mul, %l.02 - %add12 = add nsw i64 %i.07, 10 - %arrayidx15 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i64 10, i64 %add12, i64 %sub - %0 = load i32, ptr %arrayidx15, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.31, i64 1 - store i32 %0, ptr %B.addr.31, align 4 - %inc = add nsw i64 %l.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body9, label %for.inc16 - -for.inc16: ; preds = %for.body9 - %scevgep = getelementptr i32, ptr %B.addr.24, i64 50 - %inc17 = add nsw i64 %k.03, 1 - %exitcond10 = icmp ne i64 %inc17, 50 - br i1 %exitcond10, label %for.cond7.preheader, label %for.inc19 - -for.inc19: ; preds = %for.inc16 - %scevgep9 = getelementptr i32, ptr %B.addr.16, i64 2500 - %inc20 = add nsw i64 %j.05, 1 - %exitcond12 = icmp ne i64 %inc20, 50 - br i1 %exitcond12, label %for.cond4.preheader, label %for.inc22 - -for.inc22: ; preds = %for.inc19 - %scevgep11 = getelementptr i32, ptr %B.addr.08, i64 125000 - %inc23 = add nsw i64 %i.07, 1 - %exitcond13 = icmp ne i64 %inc23, 50 - br i1 %exitcond13, label %for.cond1.preheader, label %for.end24 - -for.end24: ; preds = %for.inc22 - ret void -} - - -;; for (long int i = 0; i < 50; i++) -;; for (long int j = 0; j < 50; j++) -;; for (long int k = 0; k < 50; k++) -;; for (long int l = 0; l ptr -10]! -;; for (long int l = 0; l < 50; l++) { -;; A[i][i][i + k][l] = i; -;; *B++ = A[10][i + 10][j + k][l + 10]; - -define void @sep2(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'sep2' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx12, align 4 --> Dst: store i32 %conv, ptr %arrayidx12, align 4 -; CHECK-NEXT: da analyze - consistent output [0 S 0 0]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx12, align 4 --> Dst: %0 = load i32, ptr %arrayidx19, align 4 -; CHECK-NEXT: da analyze - flow [> * * -10]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx12, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx19, align 4 --> Dst: %0 = load i32, ptr %arrayidx19, align 4 -; CHECK-NEXT: da analyze - input [0 * * 0]! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx19, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.31, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.cond1.preheader - -for.cond1.preheader: ; preds = %entry, %for.inc26 - %B.addr.08 = phi ptr [ %B, %entry ], [ %scevgep11, %for.inc26 ] - %i.07 = phi i64 [ 0, %entry ], [ %inc27, %for.inc26 ] - br label %for.cond4.preheader - -for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc23 - %B.addr.16 = phi ptr [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc23 ] - %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc24, %for.inc23 ] - br label %for.cond7.preheader - -for.cond7.preheader: ; preds = %for.cond4.preheader, %for.inc20 - %B.addr.24 = phi ptr [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc20 ] - %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc21, %for.inc20 ] - br label %for.body9 - -for.body9: ; preds = %for.cond7.preheader, %for.body9 - %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ] - %B.addr.31 = phi ptr [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ] - %conv = trunc i64 %i.07 to i32 - %add = add nsw i64 %i.07, %k.03 - %arrayidx12 = getelementptr inbounds [100 x [100 x [100 x i32]]], ptr %A, i64 %i.07, i64 %i.07, i64 %add, i64 %l.02 - store i32 %conv, ptr %arrayidx12, align 4 - %add13 = add nsw i64 %l.02, 10 - %add14 = add nsw i64 %j.05, %k.03 - %add15 = add nsw i64 %i.07, 10 - %arrayidx19 = getelementptr inbounds [100 x [100 x [100 x i32]]], ptr %A, i64 10, i64 %add15, i64 %add14, i64 %add13 - %0 = load i32, ptr %arrayidx19, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.31, i64 1 - store i32 %0, ptr %B.addr.31, align 4 - %inc = add nsw i64 %l.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body9, label %for.inc20 - -for.inc20: ; preds = %for.body9 - %scevgep = getelementptr i32, ptr %B.addr.24, i64 50 - %inc21 = add nsw i64 %k.03, 1 - %exitcond10 = icmp ne i64 %inc21, 50 - br i1 %exitcond10, label %for.cond7.preheader, label %for.inc23 - -for.inc23: ; preds = %for.inc20 - %scevgep9 = getelementptr i32, ptr %B.addr.16, i64 2500 - %inc24 = add nsw i64 %j.05, 1 - %exitcond12 = icmp ne i64 %inc24, 50 - br i1 %exitcond12, label %for.cond4.preheader, label %for.inc26 - -for.inc26: ; preds = %for.inc23 - %scevgep11 = getelementptr i32, ptr %B.addr.08, i64 125000 - %inc27 = add nsw i64 %i.07, 1 - %exitcond13 = icmp ne i64 %inc27, 50 - br i1 %exitcond13, label %for.cond1.preheader, label %for.end28 - -for.end28: ; preds = %for.inc26 - ret void -} - - -;; for (long int i = 0; i < 50; i++) -;; for (long int j = 0; j < 50; j++) -;; for (long int k = 0; k < 50; k++) -;; for (long int l = 0; l ptr]! -;; for (long int l = 0; l < 50; l++) { -;; A[i][i][i + k][l + k] = i; -;; *B++ = A[10][i + 10][j + k][l + 10]; - -define void @sep3(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { -; CHECK-LABEL: 'sep3' -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx13, align 4 --> Dst: store i32 %conv, ptr %arrayidx13, align 4 -; CHECK-NEXT: da analyze - consistent output [0 S 0 0]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx13, align 4 --> Dst: %0 = load i32, ptr %arrayidx20, align 4 -; CHECK-NEXT: da analyze - flow [> * * *]! -; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx13, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx20, align 4 --> Dst: %0 = load i32, ptr %arrayidx20, align 4 -; CHECK-NEXT: da analyze - input [0 * * 0]! -; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx20, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - confused! -; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.31, align 4 --> Dst: store i32 %0, ptr %B.addr.31, align 4 -; CHECK-NEXT: da analyze - none! -; -entry: - br label %for.cond1.preheader - -for.cond1.preheader: ; preds = %entry, %for.inc27 - %B.addr.08 = phi ptr [ %B, %entry ], [ %scevgep11, %for.inc27 ] - %i.07 = phi i64 [ 0, %entry ], [ %inc28, %for.inc27 ] - br label %for.cond4.preheader - -for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc24 - %B.addr.16 = phi ptr [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc24 ] - %j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc25, %for.inc24 ] - br label %for.cond7.preheader - -for.cond7.preheader: ; preds = %for.cond4.preheader, %for.inc21 - %B.addr.24 = phi ptr [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc21 ] - %k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc22, %for.inc21 ] - br label %for.body9 - -for.body9: ; preds = %for.cond7.preheader, %for.body9 - %l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ] - %B.addr.31 = phi ptr [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ] - %conv = trunc i64 %i.07 to i32 - %add = add nsw i64 %l.02, %k.03 - %add10 = add nsw i64 %i.07, %k.03 - %arrayidx13 = getelementptr inbounds [100 x [100 x [100 x i32]]], ptr %A, i64 %i.07, i64 %i.07, i64 %add10, i64 %add - store i32 %conv, ptr %arrayidx13, align 4 - %add14 = add nsw i64 %l.02, 10 - %add15 = add nsw i64 %j.05, %k.03 - %add16 = add nsw i64 %i.07, 10 - %arrayidx20 = getelementptr inbounds [100 x [100 x [100 x i32]]], ptr %A, i64 10, i64 %add16, i64 %add15, i64 %add14 - %0 = load i32, ptr %arrayidx20, align 4 - %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.31, i64 1 - store i32 %0, ptr %B.addr.31, align 4 - %inc = add nsw i64 %l.02, 1 - %exitcond = icmp ne i64 %inc, 50 - br i1 %exitcond, label %for.body9, label %for.inc21 - -for.inc21: ; preds = %for.body9 - %scevgep = getelementptr i32, ptr %B.addr.24, i64 50 - %inc22 = add nsw i64 %k.03, 1 - %exitcond10 = icmp ne i64 %inc22, 50 - br i1 %exitcond10, label %for.cond7.preheader, label %for.inc24 - -for.inc24: ; preds = %for.inc21 - %scevgep9 = getelementptr i32, ptr %B.addr.16, i64 2500 - %inc25 = add nsw i64 %j.05, 1 - %exitcond12 = icmp ne i64 %inc25, 50 - br i1 %exitcond12, label %for.cond4.preheader, label %for.inc27 - -for.inc27: ; preds = %for.inc24 - %scevgep11 = getelementptr i32, ptr %B.addr.08, i64 125000 - %inc28 = add nsw i64 %i.07, 1 - %exitcond13 = icmp ne i64 %inc28, 50 - br i1 %exitcond13, label %for.cond1.preheader, label %for.end29 - -for.end29: ; preds = %for.inc27 - ret void -} diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll index cdfaec76fa892..22bf1c2a980d2 100644 --- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll @@ -333,7 +333,7 @@ define void @weaktest(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - flow [*|<] splitable! -; CHECK-NEXT: da analyze - split level = 1, iteration = ((0 smax (-4 + (-4 * %n))) /u 8)! +; CHECK-NEXT: da analyze - split level = 1! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4 diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll index cd044032e34f1..18e8c39f43f86 100644 --- a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll @@ -69,7 +69,7 @@ define void @weakcrossing1(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - flow [<>] splitable! -; CHECK-NEXT: da analyze - split level = 1, iteration = 0! +; CHECK-NEXT: da analyze - split level = 1! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4 @@ -298,7 +298,7 @@ define void @weakcrossing6(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx1, align 4 ; CHECK-NEXT: da analyze - flow [<>] splitable! -; CHECK-NEXT: da analyze - split level = 1, iteration = 2! +; CHECK-NEXT: da analyze - split level = 1! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst: %0 = load i32, ptr %arrayidx1, align 4 diff --git a/llvm/test/Transforms/LoopInterchange/phi-ordering.ll b/llvm/test/Transforms/LoopInterchange/phi-ordering.ll index 74709f2eb7575..7fb1dfe87c99e 100644 --- a/llvm/test/Transforms/LoopInterchange/phi-ordering.ll +++ b/llvm/test/Transforms/LoopInterchange/phi-ordering.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=0 -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=0 -da-enable-banerjee-miv-test -S 2>&1 | FileCheck %s ;; Checks the order of the inner phi nodes does not cause havoc. ;; The inner loop has a reduction into c. The IV is not the first phi. diff --git a/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll b/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll index e7c7bf2a71819..78c394c662642 100644 --- a/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll +++ b/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=loop-interchange -cache-line-size=64 -S %s | FileCheck %s +; RUN: opt -passes=loop-interchange -cache-line-size=64 -da-enable-banerjee-miv-test -S %s | FileCheck %s @global = external local_unnamed_addr global [400 x [400 x i32]], align 16