diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 62a81ba01a5b6..7a2c92302bb4a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1228,6 +1228,30 @@ class LoopVectorizationCostModel { /// Superset of instructions that return true for isScalarWithPredication. bool isPredicatedInst(Instruction *I) const; + /// A helper function that returns how much we should divide the cost of a + /// predicated block by. Typically this is the reciprocal of the block + /// probability, i.e. if we return X we are assuming the predicated block will + /// execute once for every X iterations of the loop header so the block should + /// only contribute 1/X of its cost to the total cost calculation, but when + /// optimizing for code size it will just be 1 as code size costs don't depend + /// on execution probabilities. + /// + /// TODO: We should use actual block probability here, if available. + /// Currently, we always assume predicated blocks have a 50% chance of + /// executing, apart from blocks that are only predicated due to tail folding. + inline unsigned + getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind, + BasicBlock *BB) const { + // If a block wasn't originally predicated but was predicated due to + // e.g. tail folding, don't divide the cost. Tail folded loops may still be + // predicated in the final vector loop iteration, but for most loops that + // don't have low trip counts we can expect their probability to be close to + // zero. + if (!Legal->blockNeedsPredication(BB)) + return 1; + return CostKind == TTI::TCK_CodeSize ? 1 : 2; + } + /// Return the costs for our two available strategies for lowering a /// div/rem operation which requires speculating at least one lane. /// First result is for scalarization (will be invalid for scalable @@ -2883,7 +2907,8 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I, // Scale the cost by the probability of executing the predicated blocks. // This assumes the predicated block for each vector lane is equally // likely. - ScalarizationCost = ScalarizationCost / getPredBlockCostDivisor(CostKind); + ScalarizationCost = + ScalarizationCost / getPredBlockCostDivisor(CostKind, I->getParent()); } InstructionCost SafeDivisorCost = 0; @@ -5015,7 +5040,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount( } // Scale the total scalar cost by block probability. - ScalarCost /= getPredBlockCostDivisor(CostKind); + ScalarCost /= getPredBlockCostDivisor(CostKind, I->getParent()); // Compute the discount. A non-negative discount means the vector version // of the instruction costs more, and scalarizing would be beneficial. @@ -5065,10 +5090,11 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) { // stores and instructions that may divide by zero) will now be // unconditionally executed. For the scalar case, we may not always execute // the predicated block, if it is an if-else block. Thus, scale the block's - // cost by the probability of executing it. blockNeedsPredication from - // Legal is used so as to not include all blocks in tail folded loops. - if (VF.isScalar() && Legal->blockNeedsPredication(BB)) - BlockCost /= getPredBlockCostDivisor(CostKind); + // cost by the probability of executing it. + // getPredBlockCostDivisor won't include blocks that are only predicated due + // to tail folded loops + if (VF.isScalar()) + BlockCost /= getPredBlockCostDivisor(CostKind, BB); Cost += BlockCost; } @@ -5147,7 +5173,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // conditional branches, but may not be executed for each vector lane. Scale // the cost by the probability of executing the predicated block. if (isPredicatedInst(I)) { - Cost /= getPredBlockCostDivisor(CostKind); + Cost /= getPredBlockCostDivisor(CostKind, I->getParent()); // Add the cost of an i1 extract and a branch auto *VecI1Ty = @@ -6710,6 +6736,11 @@ bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const { SkipCostComputation.contains(UI); } +unsigned VPCostContext::getPredBlockCostDivisor( + TargetTransformInfo::TargetCostKind CostKind, BasicBlock *BB) const { + return CM.getPredBlockCostDivisor(CostKind, BB); +} + InstructionCost LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, VPCostContext &CostCtx) const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 1580a3be3180a..f0c2e5e4303db 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -50,21 +50,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF); Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step); -/// A helper function that returns how much we should divide the cost of a -/// predicated block by. Typically this is the reciprocal of the block -/// probability, i.e. if we return X we are assuming the predicated block will -/// execute once for every X iterations of the loop header so the block should -/// only contribute 1/X of its cost to the total cost calculation, but when -/// optimizing for code size it will just be 1 as code size costs don't depend -/// on execution probabilities. -/// -/// TODO: We should use actual block probability here, if available. Currently, -/// we always assume predicated blocks have a 50% chance of executing. -inline unsigned -getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind) { - return CostKind == TTI::TCK_CodeSize ? 1 : 2; -} - /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 16) = {1, 2, 4, 8} @@ -366,6 +351,11 @@ struct VPCostContext { /// has already been pre-computed. bool skipCostComputation(Instruction *UI, bool IsVector) const; + /// \returns how much the cost of a predicated block should be divided by. + /// Forwards to LoopVectorizationCostModel::getPredBlockCostDivisor. + unsigned getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind, + BasicBlock *BB) const; + /// Returns the OperandInfo for \p V, if it is a live-in. TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 775837f84e2ef..4237965f234ad 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3277,7 +3277,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, // Scale the cost by the probability of executing the predicated blocks. // This assumes the predicated block for each vector lane is equally // likely. - ScalarCost /= getPredBlockCostDivisor(Ctx.CostKind); + ScalarCost /= Ctx.getPredBlockCostDivisor(Ctx.CostKind, UI->getParent()); return ScalarCost; } case Instruction::Load: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 6cf11be0e11f7..67d2ac7d45a9b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -613,63 +613,18 @@ exit: define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; COMMON-LABEL: define void @low_trip_count_fold_tail_scalarized_store( ; COMMON-SAME: ptr [[DST:%.*]]) { -; COMMON-NEXT: [[ENTRY:.*:]] -; COMMON-NEXT: br label %[[VECTOR_PH:.*]] -; COMMON: [[VECTOR_PH]]: -; COMMON-NEXT: br label %[[VECTOR_BODY:.*]] -; COMMON: [[VECTOR_BODY]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; COMMON: [[PRED_STORE_IF]]: -; COMMON-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 0 -; COMMON-NEXT: store i8 0, ptr [[TMP0]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE]] -; COMMON: [[PRED_STORE_CONTINUE]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] -; COMMON: [[PRED_STORE_IF1]]: -; COMMON-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1 -; COMMON-NEXT: store i8 1, ptr [[TMP1]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; COMMON: [[PRED_STORE_CONTINUE2]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] -; COMMON: [[PRED_STORE_IF3]]: -; COMMON-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 2 -; COMMON-NEXT: store i8 2, ptr [[TMP2]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE4]] -; COMMON: [[PRED_STORE_CONTINUE4]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] -; COMMON: [[PRED_STORE_IF5]]: -; COMMON-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 3 -; COMMON-NEXT: store i8 3, ptr [[TMP3]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE6]] -; COMMON: [[PRED_STORE_CONTINUE6]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] -; COMMON: [[PRED_STORE_IF7]]: -; COMMON-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 4 -; COMMON-NEXT: store i8 4, ptr [[TMP4]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE8]] -; COMMON: [[PRED_STORE_CONTINUE8]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] -; COMMON: [[PRED_STORE_IF9]]: -; COMMON-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 5 -; COMMON-NEXT: store i8 5, ptr [[TMP5]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE10]] -; COMMON: [[PRED_STORE_CONTINUE10]]: -; COMMON-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] -; COMMON: [[PRED_STORE_IF11]]: -; COMMON-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 6 -; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1 -; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]] -; COMMON: [[PRED_STORE_CONTINUE12]]: -; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT1:.*]] -; COMMON: [[PRED_STORE_IF13]]: -; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 -; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1 -; COMMON-NEXT: br label %[[EXIT1]] -; COMMON: [[EXIT1]]: -; COMMON-NEXT: br label %[[SCALAR_PH1:.*]] -; COMMON: [[SCALAR_PH1]]: -; COMMON-NEXT: br [[EXIT:label %.*]] -; COMMON: [[SCALAR_PH:.*:]] +; COMMON-NEXT: [[ENTRY:.*]]: +; COMMON-NEXT: br label %[[EXIT:.*]] +; COMMON: [[EXIT]]: +; COMMON-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[EXIT]] ] +; COMMON-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i8 +; COMMON-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] +; COMMON-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1 +; COMMON-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; COMMON-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 7 +; COMMON-NEXT: br i1 [[EC]], label %[[SCALAR_PH:.*]], label %[[EXIT]] +; COMMON: [[SCALAR_PH]]: +; COMMON-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index fd6e275d098ca..3b9d86a39ed77 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -271,69 +271,11 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; ; PRED-LABEL: define void @iv_trunc( ; PRED-SAME: i32 [[X:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; PRED-NEXT: [[ENTRY:.*:]] +; PRED-NEXT: [[ENTRY:.*]]: ; PRED-NEXT: [[MUL_X:%.*]] = add i32 [[X]], 1 -; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; PRED-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; PRED: [[VECTOR_SCEVCHECK]]: -; PRED-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]] -; PRED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[MUL_X]], 0 -; PRED-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[MUL_X]] -; PRED-NEXT: [[TMP4:%.*]] = trunc i64 [[N]] to i32 -; PRED-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP3]], i32 [[TMP4]]) -; PRED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; PRED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; PRED-NEXT: [[TMP5:%.*]] = sub i32 0, [[MUL_RESULT]] -; PRED-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 0 -; PRED-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 false -; PRED-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] -; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], 4294967295 -; PRED-NEXT: [[TMP10:%.*]] = icmp ne i32 [[MUL_X]], 0 -; PRED-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -; PRED-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] -; PRED-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 2 -; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], 2 -; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 0, i64 [[TMP0]]) -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[MUL_X]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; PRED-NEXT: br label %[[VECTOR_BODY:.*]] -; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] -; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] -; PRED-NEXT: [[TMP16:%.*]] = mul <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]] -; PRED-NEXT: [[TMP17:%.*]] = zext <2 x i32> [[TMP16]] to <2 x i64> -; PRED-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 -; PRED-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP19:%.*]] = extractelement <2 x i64> [[TMP17]], i32 0 -; PRED-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP19]] -; PRED-NEXT: store i32 1, ptr [[TMP20]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] -; PRED: [[PRED_STORE_CONTINUE]]: -; PRED-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 -; PRED-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] -; PRED: [[PRED_STORE_IF1]]: -; PRED-NEXT: [[TMP22:%.*]] = extractelement <2 x i64> [[TMP17]], i32 1 -; PRED-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP22]] -; PRED-NEXT: store i32 1, ptr [[TMP23]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; PRED: [[PRED_STORE_CONTINUE2]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; PRED-NEXT: [[TMP25:%.*]] = xor i1 [[TMP24]], true -; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br label %[[EXIT:.*]] -; PRED: [[SCALAR_PH]]: ; PRED-NEXT: br label %[[FOR_BODY:.*]] ; PRED: [[FOR_BODY]]: -; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; PRED-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i32 ; PRED-NEXT: [[ADD_I:%.*]] = mul i32 [[MUL_X]], [[TRUNC_IV]] ; PRED-NEXT: [[IV_MUL:%.*]] = zext i32 [[ADD_I]] to i64 @@ -341,7 +283,7 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: store i32 1, ptr [[GEP]], align 4 ; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[FOR_BODY]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; @@ -437,101 +379,21 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; ; PRED-LABEL: define void @trunc_ivs_and_store( ; PRED-SAME: i32 [[X:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; PRED-NEXT: [[ENTRY:.*:]] -; PRED-NEXT: [[MUL:%.*]] = mul i32 [[X]], [[X]] -; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; PRED-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; PRED: [[VECTOR_SCEVCHECK]]: +; PRED-NEXT: [[ENTRY:.*]]: ; PRED-NEXT: [[TMP1:%.*]] = mul i32 [[X]], [[X]] -; PRED-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]] -; PRED-NEXT: [[TMP3:%.*]] = icmp slt i32 [[MUL]], 0 -; PRED-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 [[MUL]] -; PRED-NEXT: [[TMP5:%.*]] = trunc i64 [[N]] to i32 -; PRED-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP4]], i32 [[TMP5]]) -; PRED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 -; PRED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; PRED-NEXT: [[TMP6:%.*]] = sub i32 0, [[MUL_RESULT]] -; PRED-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], 0 -; PRED-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP7]], i1 false -; PRED-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; PRED-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[N]], 4294967295 -; PRED-NEXT: [[TMP11:%.*]] = icmp ne i32 [[MUL]], 0 -; PRED-NEXT: [[TMP12:%.*]] = and i1 [[TMP10]], [[TMP11]] -; PRED-NEXT: [[TMP13:%.*]] = or i1 [[TMP9]], [[TMP12]] -; PRED-NEXT: br i1 [[TMP13]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[TMP14:%.*]] = sub i64 [[TMP0]], 4 -; PRED-NEXT: [[TMP15:%.*]] = icmp ugt i64 [[TMP0]], 4 -; PRED-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i64 [[TMP14]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP0]]) -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[MUL]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; PRED-NEXT: br label %[[VECTOR_BODY:.*]] -; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] -; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE7]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE7]] ] -; PRED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; PRED-NEXT: [[TMP17:%.*]] = mul <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]] -; PRED-NEXT: [[TMP18:%.*]] = zext <4 x i32> [[TMP17]] to <4 x i64> -; PRED-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 -; PRED-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP18]], i32 0 -; PRED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP20]] -; PRED-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], 0 -; PRED-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] -; PRED: [[PRED_STORE_CONTINUE]]: -; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 -; PRED-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] -; PRED: [[PRED_STORE_IF2]]: -; PRED-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP18]], i32 1 -; PRED-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP24]] -; PRED-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 1 -; PRED-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE3]] -; PRED: [[PRED_STORE_CONTINUE3]]: -; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 -; PRED-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] -; PRED: [[PRED_STORE_IF4]]: -; PRED-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP18]], i32 2 -; PRED-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP28]] -; PRED-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], 2 -; PRED-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE5]] -; PRED: [[PRED_STORE_CONTINUE5]]: -; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 -; PRED-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]] -; PRED: [[PRED_STORE_IF6]]: -; PRED-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP18]], i32 3 -; PRED-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP32]] -; PRED-NEXT: [[TMP34:%.*]] = add i32 [[OFFSET_IDX]], 3 -; PRED-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; PRED: [[PRED_STORE_CONTINUE7]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]]) -; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; PRED-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; PRED-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br label %[[EXIT:.*]] -; PRED: [[SCALAR_PH]]: ; PRED-NEXT: br label %[[LOOP:.*]] ; PRED: [[LOOP]]: -; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[IV_1_TRUNC:%.*]] = trunc i64 [[IV_1]] to i32 -; PRED-NEXT: [[IV_1_MUL:%.*]] = mul i32 [[MUL]], [[IV_1_TRUNC]] +; PRED-NEXT: [[IV_1_MUL:%.*]] = mul i32 [[TMP1]], [[IV_1_TRUNC]] ; PRED-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 ; PRED-NEXT: [[MUL_EXT:%.*]] = zext i32 [[IV_1_MUL]] to i64 ; PRED-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[MUL_EXT]] ; PRED-NEXT: store i32 [[IV_2]], ptr [[GEP]], align 4 ; PRED-NEXT: [[IV_1_NEXT]] = add i64 [[IV_1]], 1 ; PRED-NEXT: [[EXITCOND_3_NOT:%.*]] = icmp eq i64 [[IV_1]], [[N]] -; PRED-NEXT: br i1 [[EXITCOND_3_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; PRED-NEXT: br i1 [[EXITCOND_3_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; @@ -627,91 +489,12 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; ; PRED-LABEL: define void @ivs_trunc_and_ext( ; PRED-SAME: i32 [[X:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; PRED-NEXT: [[ENTRY:.*:]] +; PRED-NEXT: [[ENTRY:.*]]: ; PRED-NEXT: [[ADD:%.*]] = add i32 [[X]], 1 -; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; PRED-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; PRED: [[VECTOR_SCEVCHECK]]: -; PRED-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]] -; PRED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[ADD]], 0 -; PRED-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[ADD]] -; PRED-NEXT: [[TMP4:%.*]] = trunc i64 [[N]] to i32 -; PRED-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[TMP3]], i32 [[TMP4]]) -; PRED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; PRED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; PRED-NEXT: [[TMP5:%.*]] = sub i32 0, [[MUL_RESULT]] -; PRED-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 0 -; PRED-NEXT: [[TMP7:%.*]] = select i1 [[TMP2]], i1 [[TMP6]], i1 false -; PRED-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]] -; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], 4294967295 -; PRED-NEXT: [[TMP10:%.*]] = icmp ne i32 [[ADD]], 0 -; PRED-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -; PRED-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] -; PRED-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; PRED: [[VECTOR_PH]]: -; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 4 -; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[TMP0]], 4 -; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP0]]) -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[ADD]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; PRED-NEXT: br label %[[VECTOR_BODY:.*]] -; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] -; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] -; PRED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] -; PRED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 -; PRED-NEXT: [[TMP16:%.*]] = mul <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]] -; PRED-NEXT: [[TMP17:%.*]] = zext <4 x i32> [[TMP16]] to <4 x i64> -; PRED-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 -; PRED-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP19:%.*]] = extractelement <4 x i64> [[TMP17]], i32 0 -; PRED-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP19]] -; PRED-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 0 -; PRED-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] -; PRED: [[PRED_STORE_CONTINUE]]: -; PRED-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 -; PRED-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] -; PRED: [[PRED_STORE_IF1]]: -; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1 -; PRED-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP23]] -; PRED-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 1 -; PRED-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]] -; PRED: [[PRED_STORE_CONTINUE2]]: -; PRED-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 -; PRED-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] -; PRED: [[PRED_STORE_IF3]]: -; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP17]], i32 2 -; PRED-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]] -; PRED-NEXT: [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], 2 -; PRED-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE4]] -; PRED: [[PRED_STORE_CONTINUE4]]: -; PRED-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 -; PRED-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]] -; PRED: [[PRED_STORE_IF5]]: -; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3 -; PRED-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP31]] -; PRED-NEXT: [[TMP33:%.*]] = add i32 [[OFFSET_IDX]], 3 -; PRED-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 -; PRED-NEXT: br label %[[PRED_STORE_CONTINUE6]] -; PRED: [[PRED_STORE_CONTINUE6]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]]) -; PRED-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; PRED-NEXT: [[TMP35:%.*]] = xor i1 [[TMP34]], true -; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; PRED-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br label %[[EXIT:.*]] -; PRED: [[SCALAR_PH]]: ; PRED-NEXT: br label %[[LOOP:.*]] ; PRED: [[LOOP]]: -; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] +; PRED-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] ; PRED-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV_1]] to i32 ; PRED-NEXT: [[IV_MUL:%.*]] = mul i32 [[ADD]], [[IV_TRUNC]] ; PRED-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 @@ -720,7 +503,7 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: store i32 [[IV_2]], ptr [[GEP]], align 4 ; PRED-NEXT: [[IV_1_NEXT]] = add i64 [[IV_1]], 1 ; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_1]], [[N]] -; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; PRED-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; @@ -842,7 +625,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED: [[PRED_STORE_CONTINUE5]]: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; PRED-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; PRED-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] ; PRED: [[SCALAR_PH]]: @@ -855,7 +638,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1 ; PRED-NEXT: [[IV_EXT]] = zext i32 [[IV_1_NEXT]] to i64 ; PRED-NEXT: [[C:%.*]] = icmp ult i64 [[IV_EXT]], [[N]] -; PRED-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] +; PRED-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; PRED: [[EXIT]]: ; PRED-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 6ea075f76aed4..83be0708774f1 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -181,178 +181,23 @@ for.cond.cleanup: define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) { ; DEFAULT-LABEL: define void @tail_predicate_without_optsize( ; DEFAULT-SAME: ptr [[P:%.*]], i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]], i32 [[N:%.*]]) { -; DEFAULT-NEXT: [[ENTRY:.*:]] -; DEFAULT-NEXT: br label %[[VECTOR_PH:.*]] -; DEFAULT: [[VECTOR_PH]]: -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <16 x i8> poison, i8 [[B]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT3]], <16 x i8> poison, <16 x i32> zeroinitializer -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <16 x i8> poison, i8 [[C]], i64 0 -; DEFAULT-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT5]], <16 x i8> poison, <16 x i32> zeroinitializer -; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] -; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ] -; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE35]] ] -; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[PRED_STORE_CONTINUE35]] ] -; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[VEC_IND]], splat (i8 14) -; DEFAULT-NEXT: [[TMP1:%.*]] = mul <16 x i8> [[BROADCAST_SPLAT]], [[VEC_IND1]] -; DEFAULT-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 1) -; DEFAULT-NEXT: [[TMP3:%.*]] = mul <16 x i8> [[TMP2]], [[BROADCAST_SPLAT4]] -; DEFAULT-NEXT: [[TMP4:%.*]] = add <16 x i8> [[TMP3]], [[TMP1]] -; DEFAULT-NEXT: [[TMP5:%.*]] = lshr <16 x i8> [[VEC_IND1]], splat (i8 2) -; DEFAULT-NEXT: [[TMP6:%.*]] = mul <16 x i8> [[TMP5]], [[BROADCAST_SPLAT6]] -; DEFAULT-NEXT: [[TMP7:%.*]] = add <16 x i8> [[TMP4]], [[TMP6]] -; DEFAULT-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0 -; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; DEFAULT: [[PRED_STORE_IF]]: -; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 -; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]] -; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0 -; DEFAULT-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]] -; DEFAULT: [[PRED_STORE_CONTINUE]]: -; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1 -; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] -; DEFAULT: [[PRED_STORE_IF6]]: -; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 1 -; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP13]] -; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <16 x i8> [[TMP7]], i32 1 -; DEFAULT-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; DEFAULT: [[PRED_STORE_CONTINUE7]]: -; DEFAULT-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2 -; DEFAULT-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] -; DEFAULT: [[PRED_STORE_IF8]]: -; DEFAULT-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 -; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP17]] -; DEFAULT-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP7]], i32 2 -; DEFAULT-NEXT: store i8 [[TMP19]], ptr [[TMP18]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; DEFAULT: [[PRED_STORE_CONTINUE9]]: -; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3 -; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]] -; DEFAULT: [[PRED_STORE_IF10]]: -; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 -; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP21]] -; DEFAULT-NEXT: [[TMP23:%.*]] = extractelement <16 x i8> [[TMP7]], i32 3 -; DEFAULT-NEXT: store i8 [[TMP23]], ptr [[TMP22]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE11]] -; DEFAULT: [[PRED_STORE_CONTINUE11]]: -; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4 -; DEFAULT-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] -; DEFAULT: [[PRED_STORE_IF12]]: -; DEFAULT-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP25]] -; DEFAULT-NEXT: [[TMP27:%.*]] = extractelement <16 x i8> [[TMP7]], i32 4 -; DEFAULT-NEXT: store i8 [[TMP27]], ptr [[TMP26]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE13]] -; DEFAULT: [[PRED_STORE_CONTINUE13]]: -; DEFAULT-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5 -; DEFAULT-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] -; DEFAULT: [[PRED_STORE_IF14]]: -; DEFAULT-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 5 -; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP29]] -; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <16 x i8> [[TMP7]], i32 5 -; DEFAULT-NEXT: store i8 [[TMP31]], ptr [[TMP30]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]] -; DEFAULT: [[PRED_STORE_CONTINUE15]]: -; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6 -; DEFAULT-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]] -; DEFAULT: [[PRED_STORE_IF16]]: -; DEFAULT-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 6 -; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP33]] -; DEFAULT-NEXT: [[TMP35:%.*]] = extractelement <16 x i8> [[TMP7]], i32 6 -; DEFAULT-NEXT: store i8 [[TMP35]], ptr [[TMP34]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]] -; DEFAULT: [[PRED_STORE_CONTINUE17]]: -; DEFAULT-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7 -; DEFAULT-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] -; DEFAULT: [[PRED_STORE_IF18]]: -; DEFAULT-NEXT: [[TMP37:%.*]] = add i64 [[INDEX]], 7 -; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP37]] -; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <16 x i8> [[TMP7]], i32 7 -; DEFAULT-NEXT: store i8 [[TMP39]], ptr [[TMP38]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]] -; DEFAULT: [[PRED_STORE_CONTINUE19]]: -; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8 -; DEFAULT-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]] -; DEFAULT: [[PRED_STORE_IF20]]: -; DEFAULT-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], 8 -; DEFAULT-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP41]] -; DEFAULT-NEXT: [[TMP43:%.*]] = extractelement <16 x i8> [[TMP7]], i32 8 -; DEFAULT-NEXT: store i8 [[TMP43]], ptr [[TMP42]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]] -; DEFAULT: [[PRED_STORE_CONTINUE21]]: -; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9 -; DEFAULT-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]] -; DEFAULT: [[PRED_STORE_IF22]]: -; DEFAULT-NEXT: [[TMP45:%.*]] = add i64 [[INDEX]], 9 -; DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP45]] -; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <16 x i8> [[TMP7]], i32 9 -; DEFAULT-NEXT: store i8 [[TMP47]], ptr [[TMP46]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]] -; DEFAULT: [[PRED_STORE_CONTINUE23]]: -; DEFAULT-NEXT: [[TMP48:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10 -; DEFAULT-NEXT: br i1 [[TMP48]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]] -; DEFAULT: [[PRED_STORE_IF24]]: -; DEFAULT-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], 10 -; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP49]] -; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP7]], i32 10 -; DEFAULT-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]] -; DEFAULT: [[PRED_STORE_CONTINUE25]]: -; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11 -; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]] -; DEFAULT: [[PRED_STORE_IF26]]: -; DEFAULT-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], 11 -; DEFAULT-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP53]] -; DEFAULT-NEXT: [[TMP55:%.*]] = extractelement <16 x i8> [[TMP7]], i32 11 -; DEFAULT-NEXT: store i8 [[TMP55]], ptr [[TMP54]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE27]] -; DEFAULT: [[PRED_STORE_CONTINUE27]]: -; DEFAULT-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12 -; DEFAULT-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]] -; DEFAULT: [[PRED_STORE_IF28]]: -; DEFAULT-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 12 -; DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP57]] -; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <16 x i8> [[TMP7]], i32 12 -; DEFAULT-NEXT: store i8 [[TMP59]], ptr [[TMP58]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE29]] -; DEFAULT: [[PRED_STORE_CONTINUE29]]: -; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13 -; DEFAULT-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]] -; DEFAULT: [[PRED_STORE_IF30]]: -; DEFAULT-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 13 -; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP61]] -; DEFAULT-NEXT: [[TMP63:%.*]] = extractelement <16 x i8> [[TMP7]], i32 13 -; DEFAULT-NEXT: store i8 [[TMP63]], ptr [[TMP62]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE31]] -; DEFAULT: [[PRED_STORE_CONTINUE31]]: -; DEFAULT-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14 -; DEFAULT-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]] -; DEFAULT: [[PRED_STORE_IF32]]: -; DEFAULT-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 14 -; DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP65]] -; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP7]], i32 14 -; DEFAULT-NEXT: store i8 [[TMP67]], ptr [[TMP66]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]] -; DEFAULT: [[PRED_STORE_CONTINUE33]]: -; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15 -; DEFAULT-NEXT: br i1 [[TMP68]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35]] -; DEFAULT: [[PRED_STORE_IF34]]: -; DEFAULT-NEXT: [[TMP69:%.*]] = add i64 [[INDEX]], 15 +; DEFAULT-NEXT: [[ENTRY:.*]]: +; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] +; DEFAULT: [[FOR_BODY]]: +; DEFAULT-NEXT: [[TMP69:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; DEFAULT-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[TMP69]] to i8 +; DEFAULT-NEXT: [[MUL:%.*]] = mul i8 [[A]], [[TMP0]] +; DEFAULT-NEXT: [[SHR:%.*]] = lshr i8 [[TMP0]], 1 +; DEFAULT-NEXT: [[MUL5:%.*]] = mul i8 [[SHR]], [[B]] +; DEFAULT-NEXT: [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]] +; DEFAULT-NEXT: [[SHR7:%.*]] = lshr i8 [[TMP0]], 2 +; DEFAULT-NEXT: [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]] +; DEFAULT-NEXT: [[TMP71:%.*]] = add i8 [[ADD]], [[MUL9]] ; DEFAULT-NEXT: [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP69]] -; DEFAULT-NEXT: [[TMP71:%.*]] = extractelement <16 x i8> [[TMP7]], i32 15 ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 -; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] -; DEFAULT: [[PRED_STORE_CONTINUE35]]: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) -; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; DEFAULT: [[MIDDLE_BLOCK]]: -; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] +; DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[TMP69]], 1 +; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 15 +; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -449,7 +294,7 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT-NEXT: store <4 x i16> [[TMP11]], ptr [[TMP9]], align 2 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 ; DEFAULT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: @@ -555,7 +400,7 @@ define void @vectorization_forced() { ; DEFAULT-NEXT: store <4 x i16> [[TMP11]], ptr [[TMP9]], align 2 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4 ; DEFAULT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll index a286df9bc2fc7..c2c04ce6f5ff5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll @@ -85,13 +85,13 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 10 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 35 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 43 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 71 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test2' @@ -99,8 +99,8 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index cc84fabd00ecc..002d811d46992 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -435,67 +435,16 @@ define void @test_first_order_recurrence_tried_to_scalarized(ptr %dst, i1 %c, i3 ; CHECK-LABEL: @test_first_order_recurrence_tried_to_scalarized( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = select i1 [[C:%.*]], i32 8, i32 9 -; CHECK-NEXT: br label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND]], [[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i32, ptr [[DST:%.*]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw i32 10, [[TMP5]] -; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] -; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] -; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = sub nsw i32 10, [[TMP10]] -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] -; CHECK: pred.store.continue2: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] -; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 +; CHECK: loop: +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ 4, [[ENTRY]] ], [ [[TMP18]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[TMP18]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i32 10, [[TMP15]] -; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP14]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] -; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP21:%.*]] = sub nsw i32 10, [[TMP20]] -; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP19]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i32, ptr [[DST:%.*]], i32 [[TMP18]] +; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP19]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[VECTOR_BODY]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 9deab9063d710..89aebb931244c 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -49,6 +49,8 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED1]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0>, ir<%conv> +; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> +; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, ir<%rem> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -57,9 +59,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]> -; CHECK-NEXT: REPLICATE ir<%add> = add ir<%conv>, ir<%rem> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: @@ -125,6 +125,8 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next> +; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> +; CHECK-NEXT: WIDEN ir<%add> = add ir<%rem>, ir<%recur.next> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -134,9 +136,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> -; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: @@ -284,27 +284,44 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: loop.0: ; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED]]> to i32 ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0>, ir<%conv> -; CHECK-NEXT: Successor(s): pred.store +; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> +; CHECK-NEXT: Successor(s): pred.load ; CHECK-EMPTY: -; CHECK: pred.store: { -; CHECK-NEXT: pred.store.entry: +; CHECK: pred.load: { +; CHECK-NEXT: pred.load.entry: ; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]> -; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue ; CHECK-EMPTY: -; CHECK: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> -; CHECK-NEXT: REPLICATE ir<%conv.lv.2> = sext ir<%lv.2> -; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem> -; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]> -; CHECK-NEXT: REPLICATE ir<%add> = add ir<%add.1>, ir<%conv.lv.2> -; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst> -; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK: pred.load.if: +; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V) +; CHECK-NEXT: Successor(s): pred.load.continue ; CHECK-EMPTY: -; CHECK: pred.store.continue: +; CHECK: pred.load.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%lv.2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): loop.2 +; CHECK-NEXT: Successor(s): loop.1 +; CHECK-EMPTY: +; CHECK-NEXT: loop.1: +; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, ir<%rem> +; CHECK-NEXT: WIDEN-CAST ir<%conv.lv.2> = sext vp<%9> to i32 +; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2> +; CHECK-NEXT: Successor(s): pred.store +; CHECK-EMPTY: +; CHECK-NEXT: pred.store: { +; CHECK-NEXT: pred.store.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]> +; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst> +; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK: loop.2: ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> @@ -368,6 +385,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next> +; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -377,7 +395,6 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> -; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, ir<%rem> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep> @@ -448,6 +465,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp ule vp<[[WIDE_IV]]>, vp<[[BTC]]> ; CHECK-NEXT: CLONE ir<[[L]]> = load ir<%src> ; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%.pn>, ir<[[L]]> +; CHECK-NEXT: WIDEN ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -458,7 +476,6 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[SCALAR_STEPS]]> -; CHECK-NEXT: REPLICATE ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x> ; CHECK-NEXT: REPLICATE store ir<%val>, ir<%gep.dst> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 994e9c1ce64fa..ab85cdc27d43b 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -debug -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -force-widen-divrem-via-safe-divisor=0 -debug -disable-output %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -40,10 +40,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr inbounds ir<@b>, ir<0>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> -; CHECK-NEXT: REPLICATE ir<%add> = add ir<%lv.b>, ir<10> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, vp<[[STEPS]] -; CHECK-NEXT: REPLICATE ir<%mul> = mul ir<2>, ir<%add> -; CHECK-NEXT: REPLICATE store ir<%mul>, ir<%gep.a> +; CHECK-NEXT: REPLICATE store ir<%lv.b>, ir<%gep.a> ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK: pred.store.continue: @@ -64,10 +62,8 @@ loop: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] %gep.b = getelementptr inbounds [2048 x i32], ptr @b, i32 0, i32 %iv %lv.b = load i32, ptr %gep.b, align 4 - %add = add i32 %lv.b, 10 - %mul = mul i32 2, %add %gep.a = getelementptr inbounds [2048 x i32], ptr @a, i32 0, i32 %iv - store i32 %mul, ptr %gep.a, align 4 + store i32 %lv.b, ptr %gep.a, align 4 %iv.next = add i32 %iv, 1 %large = icmp sge i32 %iv, 8 %exitcond = icmp eq i32 %iv, %k