Skip to content

Commit

Permalink
[LV] Remove unnecessary getRuntimeVF call when computing vector TC.
Browse files Browse the repository at this point in the history
As Step is VF * UF, there is no need to compute it again, which may
require multiple instructions for scalable VFs.
  • Loading branch information
fhahn committed Jun 12, 2024
1 parent 87374a8 commit c46a6e6
Show file tree
Hide file tree
Showing 25 changed files with 76 additions and 227 deletions.
5 changes: 2 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2759,9 +2759,8 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
if (Cost->foldTailByMasking()) {
assert(isPowerOf2_32(VF.getKnownMinValue() * UF) &&
"VF*UF must be a power of 2 when folding tail by masking");
Value *NumLanes = getRuntimeVF(Builder, Ty, VF * UF);
TC = Builder.CreateAdd(
TC, Builder.CreateSub(NumLanes, ConstantInt::get(Ty, 1)), "n.rnd.up");
TC = Builder.CreateAdd(TC, Builder.CreateSub(Step, ConstantInt::get(Ty, 1)),
"n.rnd.up");
}

// Now we need to generate the expression for the part of the loop that the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -102,9 +100,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -781,9 +781,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; PRED: vector.ph:
; PRED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
; PRED-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; PRED-NEXT: [[TMP7:%.*]] = sub i64 [[TMP4]], 1
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP7]]
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; PRED: vector.ph:
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8
; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP8]]
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down
48 changes: 12 additions & 36 deletions llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: entry:
; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; TFCOMMON-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFCOMMON-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -85,9 +83,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: entry:
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -203,9 +199,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: entry:
; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; TFCOMMON-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFCOMMON-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -238,9 +232,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: entry:
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -381,9 +373,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: entry:
; TFCOMMON-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFCOMMON-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; TFCOMMON-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFCOMMON-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFCOMMON-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -417,9 +407,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: entry:
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -688,9 +676,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFALWAYS-NEXT: entry:
; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; TFALWAYS-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFALWAYS-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFALWAYS-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFALWAYS-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -718,9 +704,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFFALLBACK-NEXT: entry:
; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFFALLBACK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFFALLBACK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -748,9 +732,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: entry:
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -870,9 +852,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFALWAYS-NEXT: entry:
; TFALWAYS-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFALWAYS-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; TFALWAYS-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFALWAYS-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFALWAYS-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFALWAYS-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -907,9 +887,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFFALLBACK-NEXT: entry:
; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFFALLBACK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFFALLBACK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -944,9 +922,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFA_INTERLEAVE-NEXT: entry:
; TFA_INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; TFA_INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; TFA_INTERLEAVE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFA_INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFA_INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED: vector.ph:
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; PRED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP5]]
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -391,9 +389,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; PRED: vector.ph:
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8
; PRED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP5]]
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down
28 changes: 7 additions & 21 deletions llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]]
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -390,9 +388,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]]
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -705,9 +701,7 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[TMP4]], 1
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP7]]
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -967,9 +961,7 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP5]]
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -1208,9 +1200,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]]
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -1683,9 +1673,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]]
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down Expand Up @@ -2079,9 +2067,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 {
; CHECK-ORDERED-TF: vector.ph:
; CHECK-ORDERED-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32
; CHECK-ORDERED-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-ORDERED-TF-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; CHECK-ORDERED-TF-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]]
; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,7 @@ define void @cost_store_i8(ptr %dst) #0 {
; PRED: vector.ph:
; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; PRED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 101, [[TMP4]]
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture
; CHECK-LABEL: @trip7_i64(
; CHECK: = call i64 @llvm.vscale.i64()
; CHECK-NEXT: = mul i64
; CHECK: = call i64 @llvm.vscale.i64()
; CHECK-NEXT: = mul i64
; CHECK: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[VF:%.*]] = mul i64 [[VSCALE]], 2
; CHECK: vector.body:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
Expand Down
Loading

0 comments on commit c46a6e6

Please sign in to comment.