Skip to content

Conversation

hassnaaHamdi
Copy link
Member

  • In sve-epilog-vscale-fixed.ll file, it tests the preference of fixed-width epilogue VF vs scalable when costs are equal. This NFC patch is changing the TC in the test case to be unknown to avoid folding the epilogue in future LV changes.

- In sve-epilog-vscale-fixed.ll file, it tests the preference of
  fixed-width epilogue VF vs scalable when costs are equal.
  This NFC patch is changing the TC in the test case to be unknown to
  avoid folding the epilogue in future LV changes.
@llvmbot
Copy link
Member

llvmbot commented Sep 8, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Hassnaa Hamdi (hassnaaHamdi)

Changes
  • In sve-epilog-vscale-fixed.ll file, it tests the preference of fixed-width epilogue VF vs scalable when costs are equal. This NFC patch is changing the TC in the test case to be unknown to avoid folding the epilogue in future LV changes.

Full diff: https://github.com/llvm/llvm-project/pull/157512.diff

1 Files Affected:

  • (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll (+28-24)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll
index 761c0b62e98a3..5742b3ad45749 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll
@@ -24,20 +24,21 @@ target triple = "aarch64-linux-gnu"
 ; DEBUG-EPILOG-PREFER-SCALABLE: Create Skeleton for epilogue vectorized loop (first pass)
 ; DEBUG-EPILOG-PREFER-SCALABLE: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1
 
-define void @main_vf_vscale_x_16(ptr %A) #0 {
+define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 {
 ; CHECK-LABEL: @main_vf_vscale_x_16(
 ; CHECK-NEXT:  iter.check:
-; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N:%.*]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
 ; CHECK:       vector.main.loop.iter.check:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -51,26 +52,29 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
-; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]]
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
-; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8
+; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
 ; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX1]]
 ; CHECK-NEXT:    store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
-; CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
+; CHECK-NEXT:    br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ;
@@ -78,18 +82,18 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:  iter.check:
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
 ; CHECK-EPILOG-PREFER-SCALABLE:       vector.main.loop.iter.check:
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 1024, [[TMP3]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP3]]
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK-EPILOG-PREFER-SCALABLE:       vector.ph:
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP5]]
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-EPILOG-PREFER-SCALABLE:       vector.body:
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,20 +107,20 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-EPILOG-PREFER-SCALABLE:       middle.block:
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK-EPILOG-PREFER-SCALABLE:       vec.epilog.iter.check:
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 3
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP12]]
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
 ; CHECK-EPILOG-PREFER-SCALABLE:       vec.epilog.ph:
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 1024, [[TMP14]]
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[N_VEC3:%.*]] = sub i64 1024, [[N_MOD_VF2]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[N]], [[TMP14]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK-EPILOG-PREFER-SCALABLE:       vec.epilog.vector.body:
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
@@ -124,9 +128,9 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    store <vscale x 8 x i8> splat (i8 1), ptr [[TMP15]], align 1
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], [[TMP14]]
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK-EPILOG-PREFER-SCALABLE:       vec.epilog.middle.block:
-; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 1024, [[N_VEC3]]
+; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK-EPILOG-PREFER-SCALABLE:       vec.epilog.scalar.ph:
 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
@@ -141,7 +145,7 @@ for.body:
   %arrayidx = getelementptr inbounds i8, ptr %A, i64 %iv
   store i8 1, ptr %arrayidx, align 1
   %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp ne i64 %iv.next, 1024
+  %exitcond = icmp ne i64 %iv.next, %n
   br i1 %exitcond, label %for.body, label %exit
 
 exit:

Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have other tests covering the same case with constant trip counts or should we keep the original test and add a new one with variable trip count?

@david-arm
Copy link
Contributor

do we have other tests covering the same case with constant trip counts or should we keep the original test and add a new one with variable trip count?

Yeah I think it probably does make sense to keep the old test and create a new variant of the same test with a variable trip count. With the old test we know that a trip count of 1024 is guaranteed to be a multiple of any VF we choose because vscale is a power of 2 and has a maximum value of 16.

@hassnaaHamdi Can you add a vscale_range(1,16) attribute to the functions as well?

Copy link
Contributor

@david-arm david-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM! I realised now that @main_vf_vscale_x_16 already exists in sve-epilog-vect.ll for the default case where we prefer the fixed-width epilogue, so it seems fine to simply change this test to use a variable trip count.

Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM! I realised now that @main_vf_vscale_x_16 already exists in sve-epilog-vect.ll for the default case where we prefer the fixed-width epilogue, so it seems fine to simply change this test to use a variable trip count.

Sounds good to me, LG!

@hassnaaHamdi hassnaaHamdi enabled auto-merge (squash) September 10, 2025 11:38
@hassnaaHamdi hassnaaHamdi merged commit 5739142 into llvm:main Sep 10, 2025
10 of 11 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants