Skip to content

Commit

Permalink
[SLP]Use original vector if need to shuffle truncated root.
Browse files Browse the repository at this point in the history
If the root scalar is mapped to to the smallest bit width, the vector is
truncated and the types between original buildvector and extracted value
mismatched. For extract, we emit sext/zext instructions, for shuffles we
can reuse oringal vector instead of the truncated one.

Differential Revision: https://reviews.llvm.org/D127974
  • Loading branch information
alexey-bataev committed Jun 16, 2022
1 parent 3433f78 commit 76782a6
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 0 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Expand Up @@ -8472,6 +8472,10 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
if (auto *FTy = dyn_cast<FixedVectorType>(User->getType())) {
Optional<unsigned> InsertIdx = getInsertIndex(VU);
if (InsertIdx) {
// Need to use original vector, if the root is truncated.
if (MinBWs.count(Scalar) &&
VectorizableTree[0]->VectorizedValue == Vec)
Vec = VectorRoot;
auto *It =
find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
// Checks if 2 insertelements are from the same buildvector.
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll
@@ -0,0 +1,46 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64 < %s | FileCheck %s

define i1 @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[TMP0]] to <2 x i8>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP3]], 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 3, i32 1>
; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer
; CHECK-NEXT: br label [[ELSE1:%.*]]
; CHECK: else1:
; CHECK-NEXT: [[T20:%.*]] = extractelement <2 x i32> [[T13]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[BF_CAST162]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[T20]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
; CHECK-NEXT: ret i1 [[TMP8]]
;
entry:
br i1 false, label %then, label %else

then:
br label %else

else:
%bf.load.off43 = phi i32 [ 0, %then ], [ 0, %entry ]
%bf.load.off44 = phi i32 [ 0, %then ], [ 0, %entry ]
%bf.cast162 = and i32 %bf.load.off43, 0
%t12 = insertelement <2 x i32> zeroinitializer, i32 %bf.load.off44, i64 0
%t13 = and <2 x i32> %t12, zeroinitializer
br label %else1

else1:
%cmp40 = icmp ugt i32 %bf.cast162, 0
%t20 = extractelement <2 x i32> %t13, i64 0
%cmp50 = icmp ugt i32 %t20, 0
ret i1 %cmp50
}

0 comments on commit 76782a6

Please sign in to comment.