Skip to content

Commit

Permalink
[SLP]Fix PR66176: SLP incorrectly reorders select operands.
Browse files Browse the repository at this point in the history
On the very first iteration for the reductions, when trying to build
reduction for boolean logic operations, no need to compare LHS/RHS with
the Reduction(VectorizedTree), need to compare with actual parameters of
the reduction operations.
  • Loading branch information
alexey-bataev committed Sep 15, 2023
1 parent 5664b56 commit b9ad72b
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 25 deletions.
48 changes: 26 additions & 22 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14023,31 +14023,33 @@ class HorizontalReduction {
// RedOp2 = select i1 ?, i1 RHS, i1 false

// Then, we must freeze LHS in the new op.
auto &&FixBoolLogicalOps =
[&Builder, VectorizedTree](Value *&LHS, Value *&RHS,
Instruction *RedOp1, Instruction *RedOp2) {
if (!isBoolLogicOp(RedOp1))
return;
if (LHS == VectorizedTree || getRdxOperand(RedOp1, 0) == LHS ||
isGuaranteedNotToBePoison(LHS))
return;
if (!isBoolLogicOp(RedOp2))
return;
if (RHS == VectorizedTree || getRdxOperand(RedOp2, 0) == RHS ||
isGuaranteedNotToBePoison(RHS)) {
std::swap(LHS, RHS);
return;
}
LHS = Builder.CreateFreeze(LHS);
};
auto FixBoolLogicalOps = [&, VectorizedTree](Value *&LHS, Value *&RHS,
Instruction *RedOp1,
Instruction *RedOp2,
bool InitStep) {
if (!isBoolLogicOp(RedOp1))
return;
if ((!InitStep && LHS == VectorizedTree) ||
getRdxOperand(RedOp1, 0) == LHS || isGuaranteedNotToBePoison(LHS))
return;
if (!isBoolLogicOp(RedOp2))
return;
if ((!InitStep && RHS == VectorizedTree) ||
getRdxOperand(RedOp2, 0) == RHS || isGuaranteedNotToBePoison(RHS)) {
std::swap(LHS, RHS);
return;
}
if (LHS != VectorizedTree)
LHS = Builder.CreateFreeze(LHS);
};
// Finish the reduction.
// Need to add extra arguments and not vectorized possible reduction
// values.
// Try to avoid dependencies between the scalar remainders after
// reductions.
auto &&FinalGen =
[this, &Builder, &TrackedVals, &FixBoolLogicalOps](
ArrayRef<std::pair<Instruction *, Value *>> InstVals) {
auto FinalGen =
[&](ArrayRef<std::pair<Instruction *, Value *>> InstVals,
bool InitStep) {
unsigned Sz = InstVals.size();
SmallVector<std::pair<Instruction *, Value *>> ExtraReds(Sz / 2 +
Sz % 2);
Expand All @@ -14068,7 +14070,7 @@ class HorizontalReduction {
// sequential, safe, scalar boolean logic operations, the
// reduction operand must be frozen.
FixBoolLogicalOps(StableRdxVal1, StableRdxVal2, InstVals[I].first,
RedOp);
RedOp, InitStep);
Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
StableRdxVal2, "op.rdx", ReductionOps);
ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
Expand Down Expand Up @@ -14098,11 +14100,13 @@ class HorizontalReduction {
ExtraReductions.emplace_back(I, Pair.first);
}
// Iterate through all not-vectorized reduction values/extra arguments.
bool InitStep = true;
while (ExtraReductions.size() > 1) {
VectorizedTree = ExtraReductions.front().second;
SmallVector<std::pair<Instruction *, Value *>> NewReds =
FinalGen(ExtraReductions);
FinalGen(ExtraReductions, InitStep);
ExtraReductions.swap(NewReds);
InitStep = false;
}
VectorizedTree = ExtraReductions.front().second;

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
; CHECK-NEXT: [[S3:%.*]] = select i1 [[C:%.*]], i1 [[C]], i1 false
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[S3]], i1 false
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[S3]], i1 [[TMP3]], i1 false
; CHECK-NEXT: ret i1 [[OP_RDX]]
;
%x0 = extractelement <4 x i32> %x, i32 0
Expand Down Expand Up @@ -487,7 +487,7 @@ define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
; CHECK-NEXT: [[S3:%.*]] = select i1 [[C:%.*]], i1 true, i1 [[C]]
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 true, i1 [[S3]]
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[S3]], i1 true, i1 [[TMP3]]
; CHECK-NEXT: ret i1 [[OP_RDX]]
;
%x0 = extractelement <4 x i32> %x, i32 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ define i1 @test(i32 %inc) {
; CHECK-LABEL: define i1 @test(
; CHECK-SAME: i32 [[INC:%.*]]) {
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], 3
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 poison, i1 true, i1 [[CMP]]
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[CMP]], i1 true, i1 poison
; CHECK-NEXT: ret i1 [[OP_RDX]]
;
%cmp = icmp ult i32 %inc, 3
Expand Down

0 comments on commit b9ad72b

Please sign in to comment.