-
Notifications
You must be signed in to change notification settings - Fork 11.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Add non-poison propagating LogicalAnd VPInstruction opcode. #91897
Conversation
Add a new opcode to mode non-poison propagating logical AND operations used when generating edge masks. This follows the similar decision to model Not as dedicated opcode as well, to improve clarity. This also helps to simplify the matchers for llvm#89386.
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesAdd a new opcode to mode non-poison propagating logical AND operations used when generating edge masks. This follows the similar decision to model Not as dedicated opcode as well, to improve clarity. This also helps to simplify the matchers for Full diff: https://github.com/llvm/llvm-project/pull/91897.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ece2a34f180cb..c03c278fcebe7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -179,6 +179,12 @@ class VPBuilder {
VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
}
+ VPValue *createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
+ const Twine &Name = "") {
+ return tryInsertInstruction(
+ new VPInstruction(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name));
+ }
+
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
DebugLoc DL = {}, const Twine &Name = "",
std::optional<FastMathFlags> FMFs = std::nullopt) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 261933966b74b..ad106f41c32cd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8011,14 +8011,10 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
EdgeMask = Builder.createNot(EdgeMask, BI->getDebugLoc());
if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
- // The condition is 'SrcMask && EdgeMask', which is equivalent to
- // 'select i1 SrcMask, i1 EdgeMask, i1 false'.
- // The select version does not introduce new UB if SrcMask is false and
- // EdgeMask is poison. Using 'and' here introduces undefined behavior.
- VPValue *False = Plan.getOrAddLiveIn(
- ConstantInt::getFalse(BI->getCondition()->getType()));
- EdgeMask =
- Builder.createSelect(SrcMask, EdgeMask, False, BI->getDebugLoc());
+ // Use LogicalAnd as it does not propagate poison, i.e. does not introduce
+ // new UB if SrcMask is false and EdgeMask is poison. Using 'and' here
+ // introduces undefined behavior.
+ EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask);
}
return EdgeMaskCache[Edge] = EdgeMask;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0784665efd14b..4b3cb15b5e1e6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1177,6 +1177,7 @@ class VPInstruction : public VPRecipeWithIRFlags {
BranchOnCount,
BranchOnCond,
ComputeReductionResult,
+ LogicalAnd, // Non-poison propagating logical And.
// Add an offset in bytes (second operand) to a base pointer (first
// operand). Only generates scalar values (either for the first lane only or
// for all lanes, depending on its uses).
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 140516e08e795..e5237a35f42f2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -137,6 +137,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::LogicalAnd:
case VPInstruction::PtrAdd:
return false;
default:
@@ -557,6 +558,12 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
return ReducedPartRdx;
}
+ case VPInstruction::LogicalAnd: {
+ Value *A = State.get(getOperand(0), Part);
+ Value *B = State.get(getOperand(1), Part);
+ return Builder.CreateSelect(A, B, ConstantInt::getFalse(A->getType()),
+ Name);
+ }
case VPInstruction::PtrAdd: {
assert(vputils::onlyFirstLaneUsed(this) &&
"can only generate first lane for PtrAdd");
@@ -689,6 +696,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
+ case VPInstruction::LogicalAnd:
+ O << "logical-and";
+ break;
case VPInstruction::PtrAdd:
O << "ptradd";
break;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 7056bbe6ba1b7..dad45d473dc02 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -432,7 +432,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db
; CHECK-NEXT: WIDEN ir<%cmp1> = icmp slt ir<%lsd>, ir<100>
; CHECK-NEXT: EMIT vp<[[NOT1:%.+]]> = not ir<%cmp1>, !dbg /tmp/s.c:5:3
; CHECK-NEXT: WIDEN ir<%cmp2> = icmp sge ir<%lsd>, ir<200>
-; CHECK-NEXT: EMIT vp<[[SEL1:%.+]]> = select vp<[[NOT1]]>, ir<%cmp2>, ir<false>, !dbg /tmp/s.c:5:21
+; CHECK-NEXT: EMIT vp<[[SEL1:%.+]]> = logical-and vp<[[NOT1]]>, ir<%cmp2>, !dbg /tmp/s.c:5:21
; CHECK-NEXT: EMIT vp<[[OR1:%.+]]> = or vp<[[SEL1]]>, ir<%cmp1>
; CHECK-NEXT: Successor(s): pred.sdiv
; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 108b78a70fa1a..1e60e57a5409d 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -269,7 +269,7 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) {
; CHECK-NEXT: CLONE ir<%lv> = load ir<%gep.A.uniform>
; CHECK-NEXT: WIDEN ir<%cmp> = icmp ult ir<%iv>, ir<%k>
; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not ir<%cmp>
-; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK]]>, vp<[[NOT2]]>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK]]>, vp<[[NOT2]]>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
@@ -340,7 +340,7 @@ define void @pred_cfg1(i32 %k, i32 %j) {
; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%c.1> = icmp ult ir<%iv>, ir<%j>
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10>
-; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK1]]>, ir<%c.1>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.1>
; CHECK-NEXT: Successor(s): pred.load
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.load: {
@@ -362,7 +362,7 @@ define void @pred_cfg1(i32 %k, i32 %j) {
; CHECK-EMPTY:
; CHECK-NEXT: then.0.0:
; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.1>
-; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, vp<[[NOT]]>
; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]>
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
; CHECK-NEXT: Successor(s): pred.store
@@ -441,7 +441,7 @@ define void @pred_cfg2(i32 %k, i32 %j) {
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10>
; CHECK-NEXT: WIDEN ir<%c.0> = icmp ult ir<%iv>, ir<%j>
; CHECK-NEXT: WIDEN ir<%c.1> = icmp ugt ir<%iv>, ir<%j>
-; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK1]]>, ir<%c.0>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK1]]>, ir<%c.0>
; CHECK-NEXT: Successor(s): pred.load
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.load: {
@@ -463,10 +463,10 @@ define void @pred_cfg2(i32 %k, i32 %j) {
; CHECK-EMPTY:
; CHECK-NEXT: then.0.0:
; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.0>
-; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, vp<[[NOT]]>
; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]>
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
-; CHECK-NEXT: EMIT vp<[[MASK4:%.+]]> = select vp<[[OR]]>, ir<%c.1>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK4:%.+]]> = logical-and vp<[[OR]]>, ir<%c.1>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
@@ -549,7 +549,7 @@ define void @pred_cfg3(i32 %k, i32 %j) {
; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10>
; CHECK-NEXT: WIDEN ir<%c.0> = icmp ult ir<%iv>, ir<%j>
-; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK1:%.+]]>, ir<%c.0>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK1:%.+]]>, ir<%c.0>
; CHECK-NEXT: Successor(s): pred.load
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.load: {
@@ -571,10 +571,10 @@ define void @pred_cfg3(i32 %k, i32 %j) {
; CHECK-EMPTY:
; CHECK-NEXT: then.0.0:
; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.0>
-; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = logical-and vp<[[MASK1]]>, vp<[[NOT]]>
; CHECK-NEXT: EMIT vp<[[MASK4:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]>
; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]>
-; CHECK-NEXT: EMIT vp<[[MASK5:%.+]]> = select vp<[[MASK4]]>, ir<%c.0>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK5:%.+]]> = logical-and vp<[[MASK4]]>, ir<%c.0>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
@@ -683,7 +683,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) {
; CHECK-EMPTY:
; CHECK-NEXT: loop.3:
; CHECK-NEXT: WIDEN ir<%c.0> = icmp ult ir<%iv>, ir<%j>
-; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = select vp<[[MASK]]>, ir<%c.0>, ir<false>
+; CHECK-NEXT: EMIT vp<[[MASK2:%.+]]> = logical-and vp<[[MASK]]>, ir<%c.0>
; CHECK-NEXT: WIDEN ir<%mul> = mul vp<[[PRED1]]>, vp<[[PRED2]]>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks! Adding a couple of minor comments.
// Use LogicalAnd as it does not propagate poison, i.e. does not introduce | ||
// new UB if SrcMask is false and EdgeMask is poison. Using 'and' here | ||
// introduces undefined behavior. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Use LogicalAnd as it does not propagate poison, i.e. does not introduce | |
// new UB if SrcMask is false and EdgeMask is poison. Using 'and' here | |
// introduces undefined behavior. | |
// The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask | |
// is false and EdgeMask is poison. Avoid that by using 'LogicalAnd' | |
// instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'. |
(BTW, the converse - false EdgeMask and poison SrcMask - is irrelevant, because EdgeMask is conditional on SrcMask, right?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes IIUC
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(nit, post-commit): above suggestion was a rephrasing of the initial comment, meant to replace it rather than augment it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes, my bad! Should be fixed in b1e99a6
return Builder.CreateSelect(A, B, ConstantInt::getFalse(A->getType()), | ||
Name); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return Builder.CreateSelect(A, B, ConstantInt::getFalse(A->getType()), | |
Name); | |
return Builder.CreateLogicalAnd(A, B, Name); |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes, done ,thanks!
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, | ||
DebugLoc DL = {}, const Twine &Name = "", | ||
std::optional<FastMathFlags> FMFs = std::nullopt) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this now useless, untested, subject to dce?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was one other place that created selects, updated here: e122380
Follow-up to remove a redundant comment post-commit #91897
Add a new opcode to mode non-poison propagating logical AND operations used when generating edge masks. This follows the similar decision to model Not as dedicated opcode as well, to improve clarity.
This also helps to simplify the matchers for
#89386.