Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[SLP]Try to vectorize tiny trees with shuffled gathers.
If the first tree element is vectorize and the second is gather, it
still might be profitable to vectorize it if the gather node contains
less scalars to vectorize than the original tree node. It might be
profitable to use shuffles.

Differential Revision: https://reviews.llvm.org/D101397
  • Loading branch information
alexey-bataev committed Apr 28, 2021
1 parent aee44fc commit 8af4723
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
9 changes: 7 additions & 2 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Expand Up @@ -4032,10 +4032,15 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const {
if (VectorizableTree.size() != 2)
return false;

// Handle splat and all-constants stores.
// Handle splat and all-constants stores. Also try to vectorize tiny trees
// with the second gather nodes if they have less scalar operands rather than
// the initial tree element (may be profitable to shuffle the second gather).
if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
(allConstant(VectorizableTree[1]->Scalars) ||
isSplat(VectorizableTree[1]->Scalars)))
isSplat(VectorizableTree[1]->Scalars) ||
(VectorizableTree[1]->State == TreeEntry::NeedToGather &&
VectorizableTree[1]->Scalars.size() <
VectorizableTree[0]->Scalars.size())))
return true;

// Gathering cost would be too much for tiny trees.
Expand Down
13 changes: 5 additions & 8 deletions llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
Expand Up @@ -272,21 +272,18 @@ define void @tiny_vector_gather(i32 *%a, i32 *%v1, i32 *%v2) {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[V1:%.*]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[V2:%.*]], align 4
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
; CHECK-NEXT: store i32 [[TMP1]], i32* [[PTR0]], align 16
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
; CHECK-NEXT: store i32 [[TMP2]], i32* [[PTR1]], align 4
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
; CHECK-NEXT: store i32 [[TMP1]], i32* [[PTR2]], align 8
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
; CHECK-NEXT: store i32 [[TMP2]], i32* [[PTR3]], align 4
; CHECK-NEXT: [[PTR4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
; CHECK-NEXT: store i32 [[TMP1]], i32* [[PTR4]], align 16
; CHECK-NEXT: [[PTR5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 5
; CHECK-NEXT: store i32 [[TMP2]], i32* [[PTR5]], align 4
; CHECK-NEXT: [[PTR6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 6
; CHECK-NEXT: store i32 [[TMP1]], i32* [[PTR6]], align 8
; CHECK-NEXT: [[PTR7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 7
; CHECK-NEXT: store i32 [[TMP2]], i32* [[PTR7]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[PTR0]] to <8 x i32>*
; CHECK-NEXT: store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP5]], align 16
; CHECK-NEXT: ret void
;
%1 = load i32, i32* %v1, align 4
Expand Down

0 comments on commit 8af4723

Please sign in to comment.