-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[LV] Add coverage for fixing-up scalar resume values #160492
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Increase coverage of the routine fixScalarResumeValuesFromBypass in the case where the original scalar resume value is zero. Co-authored-by: Florian Hahn <flo@fhahn.com>
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesIncrease coverage of the routine fixScalarResumeValuesFromBypass in the case where the original scalar resume value is zero. Full diff: https://github.com/llvm/llvm-project/pull/160492.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll
new file mode 100644
index 0000000000000..f5e3b55c42a72
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s
+
+; This tests exercises the out-of-band fixing-up of scalar resume values.
+
+target triple = "aarch64"
+
+define void @epilogue_vectorization_fix_scalar_resume_values(ptr %dst, i64 %n) {
+; CHECK-LABEL: define void @epilogue_vectorization_fix_scalar_resume_values(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ITER_CHECK:.*]]:
+; CHECK-NEXT: [[REM:%.*]] = urem i64 [[N]], 3
+; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[REM]], 32
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[REM]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP0]], align 1
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[REM]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[REM]], [[N_VEC]]
+; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
+; CHECK: [[VEC_EPILOG_PH]]:
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]]
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N3:%.*]] = icmp eq i64 [[REM]], 0
+; CHECK-NEXT: br i1 [[CMP_N3]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_DST_IV:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: store i8 0, ptr [[GEP_DST_IV]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[REM]]
+; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %rem = urem i64 %n, 3
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %gep.dst.iv = getelementptr i8, ptr %dst, i64 %iv
+ store i8 0, ptr %gep.dst.iv, align 1
+ %iv.next = add i64 %iv, 1
+ %exit.cond = icmp eq i64 %iv.next, %rem
+ br i1 %exit.cond, label %exit, label %loop
+
+exit:
+ ret void
+}
|
llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll
Show resolved
Hide resolved
llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll
Show resolved
Hide resolved
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/26164 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/26007 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/26009 Here is the relevant piece of the build log for the reference
|
Ugh, my bad yet again! LV is moving so fast! |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/28178 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/23692 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/25051 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/11/builds/24723 Here is the relevant piece of the build log for the reference
|
* main: (502 commits) GlobalISel: Adjust insert point when expanding G_[SU]DIVREM (llvm#160683) [LV] Add coverage for fixing-up scalar resume values (llvm#160492) AMDGPU: Convert wave_any test to use update_mc_test_checks [LV] Add partial reduction tests multiplying extend with constants. Revert "[MLIR] Implement remark emitting policies in MLIR" (llvm#160681) [NFC][InstSimplify] Refactor fminmax-folds.ll test (llvm#160504) [LoongArch] Pre-commit tests for [x]vldi instructions with special constant splats (llvm#159228) [BOLT] Fix dwarf5-dwoid-no-dwoname.s (llvm#160676) [lldb][test] Refactor and expand TestMemoryRegionDirtyPages.py (llvm#156035) [gn build] Port 833d5f0 AMDGPU: Ensure both wavesize features are not set (llvm#159234) [LoopInterchange] Bail out when finding a dependency with all `*` elements (llvm#149049) [libc++] Avoid constructing additional objects when using map::at (llvm#157866) [lldb][test] Make hex prefix optional in DWARF union types test [X86] Add missing prefixes to trunc-sat tests (llvm#160662) [AMDGPU] Fix vector legalization for bf16 valu ops (llvm#158439) [LoongArch][NFC] Pre-commit tests for `[x]vadda.{b/h/w/d}` [mlir][tosa] Relax constraint on matmul verifier requiring equal operand types (llvm#155799) [clang][Sema] Accept gnu format attributes (llvm#160255) [LoongArch][NFC] Add tests for element extraction from binary add operation (llvm#159725) ...
Increase coverage of the routine fixScalarResumeValuesFromBypass in the case where the original scalar resume value is zero.