diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 405d4a742f37b..a84ead26f1d9d 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -272,6 +272,10 @@ class LoopVectorizationLegality { /// induction descriptor. using InductionList = MapVector; + /// MonotonicPHIList saves monotonic phi variables and maps them to the + /// monotonic phi descriptor. + using MonotonicPHIList = MapVector; + /// RecurrenceSet contains the phi nodes that are recurrences other than /// inductions and reductions. using RecurrenceSet = SmallPtrSet; @@ -315,6 +319,11 @@ class LoopVectorizationLegality { /// Returns the induction variables found in the loop. const InductionList &getInductionVars() const { return Inductions; } + /// Returns the monotonic phi variables found in the loop. + const MonotonicPHIList &getMonotonicPHIs() const { return MonotonicPHIs; } + + bool hasMonotonicPHIs() const { return !MonotonicPHIs.empty(); } + /// Return the fixed-order recurrences found in the loop. RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; } @@ -372,6 +381,12 @@ class LoopVectorizationLegality { /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). int isConsecutivePtr(Type *AccessTy, Value *Ptr) const; + /// Returns true if Phi is monotonic variable. + bool isMonotonicPHI(PHINode *Phi) const; + + /// Check if memory access is compressed when vectorizing. + bool isCompressedPtr(Type *AccessTy, Value *Ptr, BasicBlock *BB) const; + /// Returns true if \p V is invariant across all loop iterations according to /// SCEV. bool isInvariant(Value *V) const; @@ -677,6 +692,9 @@ class LoopVectorizationLegality { /// variables can be pointers. InductionList Inductions; + /// Holds all of the monotonic phi variables that we found in the loop. + MonotonicPHIList MonotonicPHIs; + /// Holds all the casts that participate in the update chain of the induction /// variables, and that have been proven to be redundant (possibly under a /// runtime guard). These casts can be ignored when creating the vectorized diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 03112c67dda7b..464ec496909e2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -45,6 +45,10 @@ AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, cl::desc("Enable recognition of non-constant strided " "pointer induction variables.")); +static cl::opt EnableMonotonicPatterns( + "lv-monotonic-patterns", cl::init(true), cl::Hidden, + cl::desc("Enable recognition of monotonic patterns.")); + static cl::opt HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, cl::desc("Allow enabling loop hints to reorder " @@ -470,6 +474,30 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy, return 0; } +bool LoopVectorizationLegality::isMonotonicPHI(PHINode *Phi) const { + return MonotonicPHIs.count(Phi); +} + +bool LoopVectorizationLegality::isCompressedPtr(Type *AccessTy, Value *Ptr, + BasicBlock *BB) const { + MonotonicDescriptor Desc; + if (!MonotonicDescriptor::isMonotonicVal(Ptr, TheLoop, Desc, *PSE.getSE())) + return false; + + // Check if memory operation will use the same mask as monotonic phi. + // TODO: relax restrictions of current implementation. + if (Desc.getPredicateEdge() != + MonotonicDescriptor::Edge(BB, BB->getUniqueSuccessor())) + return false; + + // Check if pointer step equals access size. + auto *Step = + dyn_cast(Desc.getExpr()->getStepRecurrence(*PSE.getSE())); + if (!Step) + return false; + return Step->getAPInt() == BB->getDataLayout().getTypeAllocSize(AccessTy); +} + bool LoopVectorizationLegality::isInvariant(Value *V) const { return LAI->isInvariant(V); } @@ -916,6 +944,13 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) { return true; } + MonotonicDescriptor MD; + if (EnableMonotonicPatterns && + MonotonicDescriptor::isMonotonicPHI(Phi, TheLoop, MD, *PSE.getSE())) { + MonotonicPHIs[Phi] = MD; + return true; + } + if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) { AllowedExit.insert(Phi); FixedOrderRecurrences.insert(Phi); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6565c8c036ca0..e1fd1593654e5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1241,9 +1241,9 @@ class LoopVectorizationCostModel { getDivRemSpeculationCost(Instruction *I, ElementCount VF) const; - /// Returns widening decision (CM_Widen or CM_Widen_Reverse) if \p I is a - /// memory instruction with consecutive access that can be widened, or - /// CM_Unknown otherwise. + /// Returns widening decision (CM_Widen, CM_Widen_Reverse or CM_Compressed) if + /// \p I is a memory instruction with consecutive access that can be widened, + /// or CM_Unknown otherwise. InstWidening memoryInstructionCanBeWidened(Instruction *I, ElementCount VF); /// Returns true if \p I is a memory instruction in an interleaved-group @@ -3000,6 +3000,9 @@ LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I, auto *Ptr = getLoadStorePointerOperand(I); auto *ScalarTy = getLoadStoreType(I); + if (Legal->isCompressedPtr(ScalarTy, Ptr, I->getParent())) + return CM_Compressed; + // In order to be widened, the pointer should be consecutive, first of all. auto Stride = Legal->isConsecutivePtr(ScalarTy, Ptr); if (!Stride) @@ -3257,6 +3260,39 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { AddToWorklistIfAllowed(IndUpdate); } + // Handle monotonic phis (similarly to induction vars). + for (const auto &MonotonicPHI : Legal->getMonotonicPHIs()) { + auto *Phi = MonotonicPHI.first; + auto *PhiUpdate = cast(Phi->getIncomingValueForBlock(Latch)); + const auto &Desc = MonotonicPHI.second; + + auto UniformPhi = llvm::all_of(Phi->users(), [&](User *U) -> bool { + auto *I = cast(U); + if (I == Desc.getStepInst()) + return true; + if (auto *PN = dyn_cast(I); PN && Desc.getChain().contains(PN)) + return true; + return !TheLoop->contains(I) || Worklist.count(I) || + IsVectorizedMemAccessUse(I, Phi); + }); + if (!UniformPhi) + continue; + + auto UniformPhiUpdate = + llvm::all_of(PhiUpdate->users(), [&](User *U) -> bool { + auto *I = cast(U); + if (I == Phi) + return true; + return !TheLoop->contains(I) || Worklist.count(I) || + IsVectorizedMemAccessUse(I, Phi); + }); + if (!UniformPhiUpdate) + continue; + + AddToWorklistIfAllowed(Phi); + AddToWorklistIfAllowed(PhiUpdate); + } + Uniforms[VF].insert_range(Worklist); } @@ -4561,6 +4597,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF, if (Plan.hasEarlyExit()) return 1; + // Monotonic vars don't support interleaving. + if (Legal->hasMonotonicPHIs()) + return 1; + const bool HasReductions = any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(), IsaPred); @@ -8074,12 +8114,19 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range))) return Recipe; - VPHeaderPHIRecipe *PhiRecipe = nullptr; - assert((Legal->isReductionVariable(Phi) || + VPSingleDefRecipe *PhiRecipe = nullptr; + assert((Legal->isMonotonicPHI(Phi) || Legal->isReductionVariable(Phi) || Legal->isFixedOrderRecurrence(Phi)) && - "can only widen reductions and fixed-order recurrences here"); + "can only widen monotonic phis, reductions and fixed-order " + "recurrences here"); VPValue *StartV = Operands[0]; - if (Legal->isReductionVariable(Phi)) { + Value *IncomingVal = + Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()); + if (Legal->isMonotonicPHI(Phi)) { + PhiRecipe = new VPPhi({StartV}, Phi->getDebugLoc(), + Phi->getName() + ".monotonic"); + PhiRecipe->setUnderlyingValue(Phi); + } else if (Legal->isReductionVariable(Phi)) { const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi); assert(RdxDesc.getRecurrenceStartValue() == Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); @@ -8430,6 +8477,55 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // bring the VPlan to its final state. // --------------------------------------------------------------------------- + // Adjust the recipes for any monotonic phis. + auto &MonotonicPHIs = Legal->getMonotonicPHIs(); + for (VPRecipeBase &R : HeaderVPBB->phis()) { + auto *MonotonicPhi = dyn_cast(&R); + if (!MonotonicPhi) + continue; + assert(MonotonicPhi->getNumIncoming() == 2 && + MonotonicPhi->getIncomingBlock(0) == Plan->getVectorPreheader()); + + auto It = + MonotonicPHIs.find(cast(MonotonicPhi->getUnderlyingValue())); + if (It == MonotonicPHIs.end()) + continue; + auto &Desc = It->second; + + // Prohibit scalarization of monotonic phis. + if (!all_of(Range, [&](ElementCount VF) { + return CM.isUniformAfterVectorization( + MonotonicPhi->getUnderlyingInstr(), VF); + })) + return nullptr; + + // Obtain mask value for the predicate edge from the last VPBlendRecipe in + // chain. + VPValue *Chain = MonotonicPhi->getIncomingValue(1); + VPValue *Mask = nullptr; + while (auto *BlendR = dyn_cast(Chain)) + for (unsigned I = 0, E = BlendR->getNumIncomingValues(); I != E; ++I) + if (auto *IncomingVal = BlendR->getIncomingValue(I); + IncomingVal != MonotonicPhi) { + Chain = IncomingVal; + Mask = BlendR->getMask(I); + break; + } + assert(Mask); + + auto &SE = *PSE.getSE(); + auto *Step = vputils::getOrCreateVPValueForSCEVExpr( + *Plan, Desc.getExpr()->getStepRecurrence(SE)); + + auto *MonotonicI = + new VPInstruction(VPInstruction::ComputeMonotonicResult, + {MonotonicPhi, Mask, Step}, *Desc.getStepInst()); + auto *BackedgeVal = MonotonicPhi->getIncomingValue(1); + auto *InsertBlock = BackedgeVal->getDefiningRecipe()->getParent(); + InsertBlock->insert(MonotonicI, InsertBlock->getFirstNonPhi()); + BackedgeVal->replaceAllUsesWith(MonotonicI); + } + // Adjust the recipes for any inloop reductions. adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start); @@ -9892,6 +9988,15 @@ bool LoopVectorizePass::processLoop(Loop *L) { IC = LVP.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost); unsigned SelectedIC = std::max(IC, UserIC); + + if (LVL.hasMonotonicPHIs() && SelectedIC > 1) { + reportVectorizationFailure( + "Interleaving of loop with monotonic vars", + "Interleaving of loops with monotonic vars is not supported", + "CantInterleaveWithMonotonicVars", ORE, L); + return false; + } + // Optimistically generate runtime checks if they are needed. Drop them if // they turn out to not be profitable. if (VF.Width.isVector() || SelectedIC > 1) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 26256951a9c6c..a763e82725d9f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1014,6 +1014,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, ComputeAnyOfResult, ComputeFindIVResult, ComputeReductionResult, + ComputeMonotonicResult, // Extracts the last lane from its operand if it is a vector, or the last // part if scalar. In the latter case, the recipe will be removed during // unrolling. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 80a2e4bc3f754..94b4d5fe7e499 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -97,6 +97,11 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { case VPInstruction::ComputeReductionResult: { return inferScalarType(R->getOperand(0)); } + case VPInstruction::ComputeMonotonicResult: { + auto *PhiR = cast(R->getOperand(0)); + auto *OrigPhi = cast(PhiR->getUnderlyingValue()); + return OrigPhi->getType(); + } case VPInstruction::ExplicitVectorLength: return Type::getIntNTy(Ctx, 32); case Instruction::PHI: diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 0b0bd63ee2b28..132e9a03b8132 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -550,6 +550,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { case VPInstruction::ActiveLaneMask: case VPInstruction::ComputeAnyOfResult: case VPInstruction::ReductionStartVector: + case VPInstruction::ComputeMonotonicResult: return 3; case VPInstruction::ComputeFindIVResult: return 4; @@ -900,6 +901,34 @@ Value *VPInstruction::generate(VPTransformState &State) { return ReducedPartRdx; } + case VPInstruction::ComputeMonotonicResult: { + assert(getParent()->getPlan()->getUF() == 1 && + "Expected unroll factor of 1."); + + auto *Phi = State.get(getOperand(0), /*IsScalar*/ true); + auto *PhiTy = Phi->getType(); + Value *Mask = State.get(getOperand(1), 0); + auto *MaskTy = Mask->getType(); + assert(isa(MaskTy) && + cast(MaskTy)->getElementType()->isIntegerTy(1) && + "Mask type should be "); + + const auto &DL = State.CFG.PrevBB->getDataLayout(); + auto *IntTy = PhiTy->isIntegerTy() ? PhiTy : DL.getIndexType(PhiTy); + + auto *Step = State.get(getOperand(2), /*IsScalar*/ true); + + auto &Builder = State.Builder; + auto *NumElems = Builder.CreateAddReduce( + Builder.CreateZExt(Mask, MaskTy->getWithNewType(IntTy))); + auto *Offset = Builder.CreateMul(NumElems, Step); + + return PhiTy->isPointerTy() + ? Builder.CreatePtrAdd(Phi, Offset, "monotonic.add", + getGEPNoWrapFlags()) + : Builder.CreateAdd(Phi, Offset, "monotonic.add", + hasNoUnsignedWrap(), hasNoSignedWrap()); + } case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractLastElement: case VPInstruction::ExtractPenultimateElement: { @@ -1169,6 +1198,12 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, I32Ty, {Arg0Ty, I32Ty, I1Ty}); return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind); } + case VPInstruction::ComputeMonotonicResult: { + Type *ElementTy = Ctx.Types.inferScalarType(getOperand(0)); + auto *VectorTy = cast(toVectorTy(ElementTy, VF)); + return Ctx.TTI.getArithmeticReductionCost(Instruction::Add, VectorTy, + std::nullopt, Ctx.CostKind); + } case VPInstruction::ExtractLastElement: { // Add on the cost of extracting the element. auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); @@ -1182,8 +1217,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, default: // TODO: Compute cost other VPInstructions once the legacy cost model has // been retired. - assert(!getUnderlyingValue() && - "unexpected VPInstruction witht underlying value"); + assert((getOpcode() == Instruction::PHI || !getUnderlyingValue()) && + "unexpected VPInstruction with underlying value"); return 0; } } @@ -1198,6 +1233,7 @@ bool VPInstruction::isVectorToScalar() const { getOpcode() == VPInstruction::ComputeAnyOfResult || getOpcode() == VPInstruction::ComputeFindIVResult || getOpcode() == VPInstruction::ComputeReductionResult || + getOpcode() == VPInstruction::ComputeMonotonicResult || getOpcode() == VPInstruction::AnyOf; } @@ -1421,6 +1457,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ComputeReductionResult: O << "compute-reduction-result"; break; + case VPInstruction::ComputeMonotonicResult: + O << "compute-monotonic-result"; + break; case VPInstruction::LogicalAnd: O << "logical-and"; break; @@ -2043,7 +2082,9 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const { case OperationType::OverflowingBinOp: return Opcode == Instruction::Add || Opcode == Instruction::Sub || Opcode == Instruction::Mul || - Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart; + Opcode == + VPInstruction::VPInstruction::CanonicalIVIncrementForPart || + Opcode == VPInstruction::ComputeMonotonicResult; case OperationType::Trunc: return Opcode == Instruction::Trunc; case OperationType::DisjointOp: @@ -2053,7 +2094,8 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const { case OperationType::GEPOp: return Opcode == Instruction::GetElementPtr || Opcode == VPInstruction::PtrAdd || - Opcode == VPInstruction::WidePtrAdd; + Opcode == VPInstruction::WidePtrAdd || + Opcode == VPInstruction::ComputeMonotonicResult; case OperationType::FPMathOp: return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || Opcode == Instruction::FSub || Opcode == Instruction::FNeg || diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index cdfbc531ebfa6..bc305f5fbff65 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -4448,8 +4448,7 @@ void VPlanTransforms::addScalarResumePhis( // TODO: Extract final value from induction recipe initially, optimize to // pre-computed end value together in optimizeInductionExitUsers. - auto *VectorPhiR = - cast(Builder.getRecipe(&ScalarPhiIRI->getIRPhi())); + VPRecipeBase *VectorPhiR = Builder.getRecipe(&ScalarPhiIRI->getIRPhi()); if (auto *WideIVR = dyn_cast(VectorPhiR)) { if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction( WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, @@ -4471,7 +4470,8 @@ void VPlanTransforms::addScalarResumePhis( // which for FORs is a vector whose last element needs to be extracted. The // start value provides the value if the loop is bypassed. bool IsFOR = isa(VectorPhiR); - auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue(); + auto *PhiAccessor = cast(VectorPhiR); + auto *ResumeFromVectorLoop = PhiAccessor->getIncomingValue(1); assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() && "Cannot handle loops with uncountable early exits"); if (IsFOR) @@ -4480,7 +4480,7 @@ void VPlanTransforms::addScalarResumePhis( "vector.recur.extract"); StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx"; auto *ResumePhiR = ScalarPHBuilder.createScalarPhi( - {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name); + {ResumeFromVectorLoop, PhiAccessor->getIncomingValue(0)}, {}, Name); ScalarPhiIRI->addOperand(ResumePhiR); } } diff --git a/llvm/test/Transforms/LoopVectorize/compress-idioms.ll b/llvm/test/Transforms/LoopVectorize/compress-idioms.ll new file mode 100644 index 0000000000000..a5e21dfdfdab5 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/compress-idioms.ll @@ -0,0 +1,581 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S 2>&1 | FileCheck %s + +define void @test_store_with_pointer(ptr writeonly noalias %dst, ptr readonly %src, i32 %c, i32 %n) { +; CHECK-LABEL: define void @test_store_with_pointer( +; CHECK-SAME: ptr noalias writeonly [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]: +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[DST_ADDR_09:%.*]] = phi ptr [ [[DST]], %[[FOR_BODY_PREHEADER]] ], [ [[DST_ADDR_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], [[C]] +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr [[DST_ADDR_09]], i64 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[DST_ADDR_09]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[DST_ADDR_1]] = phi ptr [ [[INCDEC_PTR]], %[[IF_THEN]] ], [ [[DST_ADDR_09]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]] +; +entry: + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %dst.addr.09 = phi ptr [ %dst, %for.body.preheader ], [ %dst.addr.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %0, %c + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %incdec.ptr = getelementptr inbounds i8, ptr %dst.addr.09, i64 4 + store i32 %0, ptr %dst.addr.09, align 4 + br label %for.inc + +for.inc: + %dst.addr.1 = phi ptr [ %incdec.ptr, %if.then ], [ %dst.addr.09, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body +} + +define void @test_store_with_index(ptr writeonly noalias %dst, ptr readonly %src, i32 %c, i32 %n) { +; CHECK-LABEL: define void @test_store_with_index( +; CHECK-SAME: ptr noalias writeonly [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP11]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[TMP20:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[MONOTONIC_IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[MONOTONIC_ADD:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP20]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[MONOTONIC_IV]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP11]] +; CHECK-NEXT: call void @llvm.masked.compressstore.v4i32(<4 x i32> [[WIDE_LOAD]], ptr align 4 [[TMP12]], <4 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], 1 +; CHECK-NEXT: [[MONOTONIC_ADD]] = add nsw i32 [[MONOTONIC_IV]], [[TMP17]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP20]], 4 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[MONOTONIC_ADD]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[IDX_012:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[IDX_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], [[C]] +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[IDX_012]], 1 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[IDX_012]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IDXPROM4]] +; CHECK-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[IDX_1]] = phi i32 [ [[INC]], %[[IF_THEN]] ], [ [[IDX_012]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; +entry: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %idx.012 = phi i32 [ 0, %for.body.preheader ], [ %idx.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %0, %c + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %inc = add nsw i32 %idx.012, 1 + %idxprom4 = sext i32 %idx.012 to i64 + %arrayidx5 = getelementptr inbounds i32, ptr %dst, i64 %idxprom4 + store i32 %0, ptr %arrayidx5, align 4 + br label %for.inc + +for.inc: + %idx.1 = phi i32 [ %inc, %if.then ], [ %idx.012, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body +} + +define void @test_load_with_pointer(ptr noalias %dst, ptr readonly %src, i32 %c, i32 %n) { +; CHECK-LABEL: define void @test_load_with_pointer( +; CHECK-SAME: ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]: +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[SRC_ADDR_09:%.*]] = phi ptr [ [[SRC]], %[[FOR_BODY_PREHEADER]] ], [ [[SRC_ADDR_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], [[C]] +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr [[SRC_ADDR_09]], i64 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC_ADDR_09]], align 4 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[SRC_ADDR_1]] = phi ptr [ [[INCDEC_PTR]], %[[IF_THEN]] ], [ [[SRC_ADDR_09]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]] +; +entry: + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %src.addr.09 = phi ptr [ %src, %for.body.preheader ], [ %src.addr.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %0, %c + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %incdec.ptr = getelementptr inbounds i8, ptr %src.addr.09, i64 4 + %1 = load i32, ptr %src.addr.09, align 4 + store i32 %1, ptr %arrayidx, align 4 + br label %for.inc + +for.inc: + %src.addr.1 = phi ptr [ %incdec.ptr, %if.then ], [ %src.addr.09, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body +} + +define void @test_load_with_index(ptr noalias %dst, ptr readonly %src, i32 %c, i32 %n) { +; CHECK-LABEL: define void @test_load_with_index( +; CHECK-SAME: ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP11]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[MONOTONIC_IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[MONOTONIC_ADD:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP36]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[MONOTONIC_IV]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP11]] +; CHECK-NEXT: [[WIDE_MASKED_EXPAND_LOAD:%.*]] = call <4 x i32> @llvm.masked.expandload.v4i32(ptr align 4 [[TMP12]], <4 x i1> [[TMP3]], <4 x i32> poison) +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[WIDE_MASKED_EXPAND_LOAD]], ptr align 4 [[TMP36]], <4 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP29:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP29]]) +; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], 1 +; CHECK-NEXT: [[MONOTONIC_ADD]] = add nsw i32 [[MONOTONIC_IV]], [[TMP32]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP34]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[MONOTONIC_ADD]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT]]: +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[IDX_012:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[IDX_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], [[C]] +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[IDX_012]], 1 +; CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[IDX_012]] to i64 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IDXPROM2]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[IDX_1]] = phi i32 [ [[INC]], %[[IF_THEN]] ], [ [[IDX_012]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; +entry: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %idx.012 = phi i32 [ 0, %for.body.preheader ], [ %idx.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %0, %c + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %inc = add nsw i32 %idx.012, 1 + %idxprom2 = sext i32 %idx.012 to i64 + %arrayidx3 = getelementptr inbounds i32, ptr %src, i64 %idxprom2 + %1 = load i32, ptr %arrayidx3, align 4 + store i32 %1, ptr %arrayidx, align 4 + br label %for.inc + +for.inc: + %idx.1 = phi i32 [ %inc, %if.then ], [ %idx.012, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body +} + +define void @test_store_value(ptr writeonly noalias %dst, ptr readonly %src, i32 %c, i32 %n) { +; CHECK-LABEL: define void @test_store_value( +; CHECK-SAME: ptr noalias writeonly [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP5]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]: +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[IDX_06:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[IDX_1:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], [[C]] +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[IDX_06]], 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[IDX_06]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[IDX_1]] = phi i32 [ [[INC]], %[[IF_THEN]] ], [ [[IDX_06]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]] +; +entry: + %cmp5 = icmp sgt i32 %n, 0 + br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %idx.06 = phi i32 [ 0, %for.body.preheader ], [ %idx.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %0, %c + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %inc = add nsw i32 %idx.06, 1 + %arrayidx2 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv + store i32 %idx.06, ptr %arrayidx2, align 4 + br label %for.inc + +for.inc: + %idx.1 = phi i32 [ %inc, %if.then ], [ %idx.06, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body +} + +define i32 @test_multiple_uses(ptr writeonly noalias %dst, ptr readonly %src, i32 %c, i32 %n) { +; CHECK-LABEL: define i32 @test_multiple_uses( +; CHECK-SAME: ptr noalias writeonly [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP12:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP12]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[MONOTONIC_IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[MONOTONIC_ADD:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[MONOTONIC_IV]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]] +; CHECK-NEXT: call void @llvm.masked.compressstore.v4i32(<4 x i32> [[WIDE_LOAD]], ptr align 4 [[TMP6]], <4 x i1> [[TMP4]]) +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 1 +; CHECK-NEXT: [[MONOTONIC_ADD]] = add nsw i32 [[MONOTONIC_IV]], [[TMP11]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[MONOTONIC_ADD]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT]]: +; CHECK-NEXT: [[IDX_1_LCSSA:%.*]] = phi i32 [ [[IDX_1:%.*]], %[[FOR_INC:.*]] ], [ [[MONOTONIC_ADD]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: [[IDX_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_1_LCSSA]], %[[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[IDX_0_LCSSA]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[IDX_013:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[IDX_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], [[C]] +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[IDX_013]], 1 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[IDX_013]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IDXPROM4]] +; CHECK-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[IDX_1]] = phi i32 [ [[INC]], %[[IF_THEN]] ], [ [[IDX_013]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; +entry: + %cmp12 = icmp sgt i32 %n, 0 + br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + +for.cond.cleanup.loopexit: + %idx.1.lcssa = phi i32 [ %idx.1, %for.inc ] + br label %for.cond.cleanup + +for.cond.cleanup: + %idx.0.lcssa = phi i32 [ 0, %entry ], [ %idx.1.lcssa, %for.cond.cleanup.loopexit ] + ret i32 %idx.0.lcssa + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %idx.013 = phi i32 [ 0, %for.body.preheader ], [ %idx.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %0, %c + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %inc = add nsw i32 %idx.013, 1 + %idxprom4 = sext i32 %idx.013 to i64 + %arrayidx5 = getelementptr inbounds i32, ptr %dst, i64 %idxprom4 + store i32 %0, ptr %arrayidx5, align 4 + br label %for.inc + +for.inc: + %idx.1 = phi i32 [ %inc, %if.then ], [ %idx.013, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body +} + +define i32 @test_pre_increment(ptr writeonly noalias %dst, ptr readonly %src, i32 %c, i32 %n) { +; CHECK-LABEL: define i32 @test_pre_increment( +; CHECK-SAME: ptr noalias writeonly [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP12:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP12]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]: +; CHECK-NEXT: [[IDX_1_LCSSA:%.*]] = phi i32 [ [[IDX_1:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: [[IDX_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IDX_1_LCSSA]], %[[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[IDX_0_LCSSA]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-NEXT: [[IDX_013:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[IDX_1]], %[[FOR_INC]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], [[C]] +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[IDX_013]], 1 +; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[INC]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IDXPROM4]] +; CHECK-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[IDX_1]] = phi i32 [ [[INC]], %[[IF_THEN]] ], [ [[IDX_013]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]] +; +entry: + %cmp12 = icmp sgt i32 %n, 0 + br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + +for.cond.cleanup.loopexit: + %idx.1.lcssa = phi i32 [ %idx.1, %for.inc ] + br label %for.cond.cleanup + +for.cond.cleanup: + %idx.0.lcssa = phi i32 [ 0, %entry ], [ %idx.1.lcssa, %for.cond.cleanup.loopexit ] + ret i32 %idx.0.lcssa + +for.body: + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %idx.013 = phi i32 [ 0, %for.body.preheader ], [ %idx.1, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %0, %c + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %inc = add nsw i32 %idx.013, 1 + %idxprom4 = sext i32 %inc to i64 + %arrayidx5 = getelementptr inbounds i32, ptr %dst, i64 %idxprom4 + store i32 %0, ptr %arrayidx5, align 4 + br label %for.inc + +for.inc: + %idx.1 = phi i32 [ %inc, %if.then ], [ %idx.013, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +;. diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index c1791dfa5b761..dba607a5061a7 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1132,7 +1132,7 @@ TEST_F(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) { new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1)); VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); - VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {}, {}); + VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, false, {}, {}); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -1249,7 +1249,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1)); VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); - VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {}, {}); + VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, false, {}, {}); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_TRUE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1263,8 +1263,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); VPValue *StoredV = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); - VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, {}, - {}); + VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, false, + {}, {}); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_TRUE(Recipe.mayWriteToMemory());