Skip to content

Commit

Permalink
[SLP]Fix PR89635: do not try to vectorize single-gather alternate node.
Browse files Browse the repository at this point in the history
No need to try to vectorize single gather/buildvector with alternate
opcode graph, it is not profitable. In other cases, need to use last
instruction for inserting the vectorized code.
  • Loading branch information
alexey-bataev committed Apr 23, 2024
1 parent e0a763c commit b4a0fd4
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9640,6 +9640,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
bool IsAllowedSingleBVNode =
VectorizableTree.size() > 1 ||
(VectorizableTree.size() == 1 && VectorizableTree.front()->getOpcode() &&
!VectorizableTree.front()->isAltShuffle() &&
VectorizableTree.front()->getOpcode() != Instruction::PHI &&
VectorizableTree.front()->getOpcode() != Instruction::GetElementPtr &&
allSameBlock(VectorizableTree.front()->Scalars));
Expand Down Expand Up @@ -11032,7 +11033,10 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
isUsedOutsideBlock(V);
}) ||
(E->State == TreeEntry::NeedToGather && E->Idx == 0 &&
all_of(E->Scalars, IsaPred<ExtractElementInst, UndefValue>)))
all_of(E->Scalars, [](Value *V) {
return isa<ExtractElementInst, UndefValue>(V) ||
areAllOperandsNonInsts(V);
})))
Res.second = FindLastInst();
else
Res.second = FindFirstInst();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s

define <2 x i32> @test(i32 %arg) {
; CHECK-LABEL: define <2 x i32> @test(
; CHECK-SAME: i32 [[ARG:%.*]]) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0
; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1
; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]]
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
bb:
%or = or i32 %arg, 0
%mul = mul i32 0, 1
%mul1 = mul i32 %or, %mul
%cmp = icmp ugt i32 0, %mul1
%0 = insertelement <2 x i32> poison, i32 %or, i32 0
%1 = insertelement <2 x i32> %0, i32 %mul, i32 1
ret <2 x i32> %1
}

0 comments on commit b4a0fd4

Please sign in to comment.