diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index b08399b381f34..0f56b43089997 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15850,12 +15850,17 @@ void ScalarEvolution::LoopGuards::collectFromBlock( To = SE.getUMaxExpr(FromRewritten, RHS); if (auto *UMin = dyn_cast(FromRewritten)) EnqueueOperands(UMin); + if (RHS->isOne()) + ExprsToRewrite.push_back(From); break; case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: To = SE.getSMaxExpr(FromRewritten, RHS); - if (auto *SMin = dyn_cast(FromRewritten)) + if (auto *SMin = dyn_cast(FromRewritten)) { EnqueueOperands(SMin); + } + if (RHS->isOne()) + ExprsToRewrite.push_back(From); break; case CmpInst::ICMP_EQ: if (isa(RHS)) @@ -15986,7 +15991,22 @@ void ScalarEvolution::LoopGuards::collectFromBlock( for (const SCEV *Expr : ExprsToRewrite) { const SCEV *RewriteTo = Guards.RewriteMap[Expr]; Guards.RewriteMap.erase(Expr); - Guards.RewriteMap.insert({Expr, Guards.rewrite(RewriteTo)}); + const SCEV *Rewritten = Guards.rewrite(RewriteTo); + + // Try to strengthen divisibility of SMax/UMax expressions coming from >= + // 1 conditions. + auto *Max = dyn_cast(Rewritten); + if (Max && isa(Rewritten) && + Rewritten->getType()->isIntegerTy() && Max->getOperand(0)->isOne()) { + APInt CommonMultiple = SE.getConstantMultiple(Max->getOperand(1)); + for (const SCEV *Op : drop_begin(Max->operands(), 2)) { + CommonMultiple = APIntOps::GreatestCommonDivisor( + CommonMultiple, SE.getConstantMultiple(Op)); + } + SmallVector Ops = {SE.getConstant(CommonMultiple), Max}; + Rewritten = SE.getMinMaxExpr(Max->getSCEVType(), Ops); + } + Guards.RewriteMap.insert({Expr, Rewritten}); } } } diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll index 8d091a00ed4b9..d38010403dad7 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll @@ -61,7 +61,7 @@ define void @umin(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) -; CHECK-NEXT: Loop %for.body: Trip multiple is 1 +; CHECK-NEXT: Loop %for.body: Trip multiple is 2 ; ; void umin(unsigned a, unsigned b) { ; a *= 2; @@ -157,7 +157,7 @@ define void @smin(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) smin (4 * %b))) ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) smin (4 * %b))) -; CHECK-NEXT: Loop %for.body: Trip multiple is 1 +; CHECK-NEXT: Loop %for.body: Trip multiple is 2 ; ; void smin(signed a, signed b) { ; a *= 2; diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 04f04a8a08fc2..fce947af47be3 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -583,19 +583,50 @@ define i64 @loop_guards_needed_to_prove_deref_multiple(i32 %x, i1 %c, ptr derefe ; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_2]]) ; CHECK-NEXT: [[N:%.*]] = add i32 [[SEL]], -1 ; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SEL]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[IV_NEXT:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[IV_NEXT]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[LOOP_LATCH]] ], [ 0, [[PH]] ] +; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[PH]] ] -; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT1:%.*]], [[LOOP_LATCH1:%.*]] ], [ [[IV]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I]], align 1 ; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0 -; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH1]] ; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV1]], [[N_EXT]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: exit.loopexit: -; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV]], [[LOOP_HEADER]] ], [ 0, [[LOOP_LATCH]] ] +; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV1]], [[LOOP_HEADER1]] ], [ 0, [[LOOP_LATCH1]] ], [ 0, [[LOOP_LATCH]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[RES:%.*]] = phi i64 [ -1, [[ENTRY:%.*]] ], [ -2, [[THEN]] ], [ [[RES_PH]], [[EXIT_LOOPEXIT]] ] @@ -646,4 +677,6 @@ exit: ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} ;.