-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LLVM][SCEV] udiv (mul nuw a, vscale), (mul nuw b, vscale) -> udiv a, b #157836
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
381cc37
to
e11d52f
Compare
e11d52f
to
7284143
Compare
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Paul Walker (paulwalker-arm) Changeshttps://alive2.llvm.org/ce/z/btfzcw Patch is 27.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157836.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
index 7a45ae93b185b..164b46b54890b 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h
@@ -184,6 +184,7 @@ m_scev_PtrToInt(const Op0_t &Op0) {
/// Match a binary SCEV.
template <typename SCEVTy, typename Op0_t, typename Op1_t,
+ SCEV::NoWrapFlags WrapFlags = SCEV::FlagAnyWrap,
bool Commutable = false>
struct SCEVBinaryExpr_match {
Op0_t Op0;
@@ -192,6 +193,10 @@ struct SCEVBinaryExpr_match {
SCEVBinaryExpr_match(Op0_t Op0, Op1_t Op1) : Op0(Op0), Op1(Op1) {}
bool match(const SCEV *S) const {
+ if (auto WrappingS = dyn_cast<SCEVNAryExpr>(S))
+ if (WrappingS->getNoWrapFlags(WrapFlags) != WrapFlags)
+ return false;
+
auto *E = dyn_cast<SCEVTy>(S);
return E && E->getNumOperands() == 2 &&
((Op0.match(E->getOperand(0)) && Op1.match(E->getOperand(1))) ||
@@ -201,10 +206,12 @@ struct SCEVBinaryExpr_match {
};
template <typename SCEVTy, typename Op0_t, typename Op1_t,
+ SCEV::NoWrapFlags WrapFlags = SCEV::FlagAnyWrap,
bool Commutable = false>
-inline SCEVBinaryExpr_match<SCEVTy, Op0_t, Op1_t, Commutable>
+inline SCEVBinaryExpr_match<SCEVTy, Op0_t, Op1_t, WrapFlags, Commutable>
m_scev_Binary(const Op0_t &Op0, const Op1_t &Op1) {
- return SCEVBinaryExpr_match<SCEVTy, Op0_t, Op1_t, Commutable>(Op0, Op1);
+ return SCEVBinaryExpr_match<SCEVTy, Op0_t, Op1_t, WrapFlags, Commutable>(Op0,
+ Op1);
}
template <typename Op0_t, typename Op1_t>
@@ -220,9 +227,17 @@ m_scev_Mul(const Op0_t &Op0, const Op1_t &Op1) {
}
template <typename Op0_t, typename Op1_t>
-inline SCEVBinaryExpr_match<SCEVMulExpr, Op0_t, Op1_t, true>
+inline SCEVBinaryExpr_match<SCEVMulExpr, Op0_t, Op1_t, SCEV::FlagAnyWrap, true>
m_scev_c_Mul(const Op0_t &Op0, const Op1_t &Op1) {
- return m_scev_Binary<SCEVMulExpr, Op0_t, Op1_t, true>(Op0, Op1);
+ return m_scev_Binary<SCEVMulExpr, Op0_t, Op1_t, SCEV::FlagAnyWrap, true>(Op0,
+ Op1);
+}
+
+template <typename Op0_t, typename Op1_t>
+inline SCEVBinaryExpr_match<SCEVMulExpr, Op0_t, Op1_t, SCEV::FlagNUW, true>
+m_scev_c_NUWMul(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_scev_Binary<SCEVMulExpr, Op0_t, Op1_t, SCEV::FlagNUW, true>(Op0,
+ Op1);
}
template <typename Op0_t, typename Op1_t>
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index b08399b381f34..694fa479e9972 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3598,6 +3598,12 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
}
}
+ // udiv (mul nuw a, vscale), (mul nuw b, vscale) --> udiv a, b
+ const SCEV *NewLHS, *NewRHS;
+ if (match(LHS, m_scev_c_NUWMul(m_SCEV(NewLHS), m_SCEVVScale())) &&
+ match(RHS, m_scev_c_NUWMul(m_SCEV(NewRHS), m_SCEVVScale())))
+ return getUDivExpr(NewLHS, NewRHS);
+
// The Insertion Point (IP) might be invalid by now (due to UniqueSCEVs
// changes). Make sure we get a new one.
IP = nullptr;
diff --git a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
index 1e21fbf08a92f..e1c62309142d0 100644
--- a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
@@ -188,3 +188,43 @@ loop:
exit:
ret void
}
+
+define noundef i64 @udiv_mul_common_vscale_factor(i64 %a, i64 %b) {
+; CHECK-LABEL: 'udiv_mul_common_vscale_factor'
+; CHECK-NEXT: Classifying expressions for: @udiv_mul_common_vscale_factor
+; CHECK-NEXT: %vs = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: --> vscale U: [1,0) S: [1,0)
+; CHECK-NEXT: %a.vs = mul i64 %a, %vs
+; CHECK-NEXT: --> (vscale * %a) U: full-set S: full-set
+; CHECK-NEXT: %b.vs = mul i64 %b, %vs
+; CHECK-NEXT: --> (vscale * %b) U: full-set S: full-set
+; CHECK-NEXT: %div = udiv i64 %a.vs, %b.vs
+; CHECK-NEXT: --> ((vscale * %a) /u (vscale * %b)) U: full-set S: full-set
+; CHECK-NEXT: Determining loop execution counts for: @udiv_mul_common_vscale_factor
+;
+ %vs = call i64 @llvm.vscale()
+ %a.vs = mul i64 %a, %vs
+ %b.vs = mul i64 %b, %vs
+ %div = udiv i64 %a.vs, %b.vs
+ ret i64 %div
+}
+
+define noundef i64 @udiv_mul_nuw_common_vscale_factor(i64 %a, i64 %b) {
+; CHECK-LABEL: 'udiv_mul_nuw_common_vscale_factor'
+; CHECK-NEXT: Classifying expressions for: @udiv_mul_nuw_common_vscale_factor
+; CHECK-NEXT: %vs = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: --> vscale U: [1,0) S: [1,0)
+; CHECK-NEXT: %a.vs = mul nuw i64 %a, %vs
+; CHECK-NEXT: --> (vscale * %a)<nuw> U: full-set S: full-set
+; CHECK-NEXT: %b.vs = mul nuw i64 %b, %vs
+; CHECK-NEXT: --> (vscale * %b)<nuw> U: full-set S: full-set
+; CHECK-NEXT: %div = udiv i64 %a.vs, %b.vs
+; CHECK-NEXT: --> (%a /u %b) U: full-set S: full-set
+; CHECK-NEXT: Determining loop execution counts for: @udiv_mul_nuw_common_vscale_factor
+;
+ %vs = call i64 @llvm.vscale()
+ %a.vs = mul nuw i64 %a, %vs
+ %b.vs = mul nuw i64 %b, %vs
+ %div = udiv i64 %a.vs, %b.vs
+ ret i64 %div
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
index 8d33ccbf38861..cff15299cefa7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
@@ -49,7 +49,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -59,7 +59,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
; CHECK-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
-; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
@@ -97,7 +97,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK-VF8: vec.epilog.iter.check:
; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
-; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK-VF8: vec.epilog.ph:
; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -107,7 +107,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
; CHECK-VF8-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
-; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-VF8: vec.epilog.middle.block:
; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK-VF8: vec.epilog.scalar.ph:
@@ -150,7 +150,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP5]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
@@ -182,13 +182,13 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-VF8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-VF8: middle.block:
; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK-VF8: vec.epilog.iter.check:
; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
-; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK-VF8: vec.epilog.ph:
; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -198,7 +198,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
-; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-VF8: vec.epilog.middle.block:
; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK-VF8: vec.epilog.scalar.ph:
@@ -261,13 +261,13 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8
@@ -279,7 +279,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1
; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
-; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -313,13 +313,13 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-VF8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-VF8: middle.block:
; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK-VF8: vec.epilog.iter.check:
; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
-; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK-VF8: vec.epilog.ph:
; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-VF8-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8
@@ -331,7 +331,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8
; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
-; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-VF8: vec.epilog.middle.block:
; CHECK-VF8-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
; CHECK-VF8-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -382,14 +382,14 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP6]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000
@@ -400,7 +400,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr [[NEXT_GEP2]], align 1
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 10000
-; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
@@ -433,14 +433,14 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
; CHECK-VF8-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP6]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-VF8-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF8-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-VF8-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-VF8: middle.block:
; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]]
; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK-VF8: vec.epilog.iter.check:
; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
-; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK-VF8: vec.epilog.ph:
; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-VF8-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000
@@ -451,7 +451,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
; CHECK-VF8-NEXT: store <8 x i8> zeroinitializer, ptr [[NEXT_GEP2]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 8
; CHECK-VF8-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 10000
-; CHECK-VF8-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-VF8-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK-VF8: vec.epilog.middle.block:
; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK-VF8: vec.epilog.scalar.ph:
@@ -514,13 +514,13 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. This is likely worth generalizing to the general case, but I'm ok with landing this first.
} | ||
|
||
; Loop with vscale-based trip count vscale x 1024. | ||
; TODO: No epilogue vectorizations should remain when choosing VF = vscale x 4. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove TODO?
} | ||
} | ||
|
||
// udiv (mul nuw a, vscale), (mul nuw b, vscale) --> udiv a, b |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// TODO: Generalize to handle any common factors.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I plan to circle back, but am currently blocked by what I thought would be a trivial change to report vscale as being known positive that somehow causes loop-strength-reduce to generate worse code.
Sorry, GitHub is being buggy (again!), and I'm not able to see your changes, but from what I remember, this LGTM. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
https://alive2.llvm.org/ce/z/btfzcw