-
Notifications
You must be signed in to change notification settings - Fork 15k
[VPlan] Set flags when constructing zexts using VPWidenCastRecipe #164198
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
createWidenCast doesn't set the flag type, so when we simplify trunc (zext nneg x) -> zext x we would hit an assertion in CSE that the flag types don't match with other VPWidenCastRecipes that weren't simplified. This fixes it the same way trunc flags are handled too. As an aside I think it should be correct to preserve the nneg flag in this case since the input operand is still non-negative after the transform. But that's left to another PR.
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Luke Lau (lukel97) ChangescreateWidenCast doesn't set the flag type, so when we simplify trunc (zext nneg x) -> zext x we would hit an assertion in CSE that the flag types don't match with other VPWidenCastRecipes that weren't simplified. This fixes it the same way trunc flags are handled too. As an aside I think it should be correct to preserve the nneg flag in this case since the input operand is still non-negative after the transform. But that's left to another PR. Full diff: https://github.com/llvm/llvm-project/pull/164198.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 7651ba16b35cf..3fed003282f2b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -325,6 +325,8 @@ class VPBuilder {
VPIRFlags Flags;
if (Opcode == Instruction::Trunc)
Flags = VPIRFlags::TruncFlagsTy(false, false);
+ else if (Opcode == Instruction::ZExt)
+ Flags = VPIRFlags::NonNegFlagsTy(false);
return tryInsertInstruction(
new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags));
}
diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
index e923560bb77e8..b8ce35dc704d2 100644
--- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
@@ -319,8 +319,9 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
; CHECK: [[PRED_STORE_CONTINUE60]]:
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br [[EXIT:label %.*]]
-; CHECK: [[SCALAR_PH:.*:]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
br label %loop
@@ -349,3 +350,61 @@ loop:
exit:
ret void
}
+
+define void @simplified_cast_preserves_irflag_type(ptr %p, ptr %q, ptr %r) {
+; CHECK-LABEL: define void @simplified_cast_preserves_irflag_type(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 2
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[R]], i64 2
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[R]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP2]]
+; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[P]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
+; CHECK-NEXT: [[BOUND06:%.*]] = icmp ult ptr [[R]], [[SCEVGEP2]]
+; CHECK-NEXT: [[BOUND17:%.*]] = icmp ult ptr [[P]], [[SCEVGEP1]]
+; CHECK-NEXT: [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]]
+; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT8]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX9]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META4:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+; CHECK-NEXT: store i16 [[TMP2]], ptr [[Q]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META9:![0-9]+]]
+; CHECK-NEXT: store i16 [[TMP2]], ptr [[R]], align 2, !alias.scope [[META11:![0-9]+]], !noalias [[META4]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %x = load i8, ptr %p
+ %x.i32 = zext i8 %x to i32
+ %trunc = trunc i32 %x.i32 to i16
+ store i16 %trunc, ptr %q
+ %x.i16 = zext i8 %x to i16
+ store i16 %x.i16, ptr %r
+ %iv.next = add i64 %iv, 2
+ %done = icmp eq i64 %iv.next, 0
+ br i1 %done, label %exit, label %loop
+
+exit:
+ ret void
+}
|
| %x.i16 = zext i8 %x to i16 | ||
| store i16 %x.i16, ptr %r | ||
| %iv.next = add i64 %iv, 2 | ||
| %done = icmp eq i64 %iv.next, 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can use a value where we don’t need a scev runtime check?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't look like it, we don't get the crash with %iv.next = add i64 %iv, 1 if that's what you're asking
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| %done = icmp eq i64 %iv.next, 0 | |
| %ec = icmp eq i64 %iv.next, 100 |
or something like that (needs to be a multiple of 2, that should get rid of the runtime check
| @@ -349,3 +350,61 @@ loop: | |||
| exit: | |||
| ret void | |||
| } | |||
|
|
|||
| define void @simplified_cast_preserves_irflag_type(ptr %p, ptr %q, ptr %r) { | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could add noalias to avoid men checks I think
| %x.i16 = zext i8 %x to i16 | ||
| store i16 %x.i16, ptr %r | ||
| %iv.next = add i64 %iv, 2 | ||
| %done = icmp eq i64 %iv.next, 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| %done = icmp eq i64 %iv.next, 0 | |
| %exit.cond = icmp eq i64 %iv.next, 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like the memchecks and scev-checks are gone, so this LGTM, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/38200 Here is the relevant piece of the build log for the reference |
createWidenCast doesn't set the flag type, so when we simplify trunc (zext nneg x) -> zext x we would hit an assertion in CSE that the flag types don't match with other VPWidenCastRecipes that weren't simplified.
This fixes it the same way trunc flags are handled too.
As an aside I think it should be correct to preserve the nneg flag in this case since the input operand is still non-negative after the transform. But that's left to another PR.
Fixes #164171