diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0ce5d619d9b14..6e3608ef3befc 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -13096,10 +13096,14 @@ bool BoUpSLP::collectValuesToDemote( if (isa(V)) return true; - // If the value is not a vectorized instruction in the expression with only - // one use, it cannot be demoted. + // If the value is not a vectorized instruction in the expression and not used + // by the insertelement instruction and not used in multiple vector nodes, it + // cannot be demoted. auto *I = dyn_cast(V); - if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert(I).second) + if (!I || !getTreeEntry(I) || MultiNodeScalars.contains(I) || + !Visited.insert(I).second || all_of(I->users(), [&](User *U) { + return isa(U) && !getTreeEntry(U); + })) return false; unsigned Start = 0; @@ -13170,11 +13174,6 @@ bool BoUpSLP::collectValuesToDemote( } void BoUpSLP::computeMinimumValueSizes() { - // If there are no external uses, the expression tree must be rooted by a - // store. We can't demote in-memory values, so there is nothing to do here. - if (ExternalUses.empty()) - return; - // We only attempt to truncate integer expressions. auto &TreeRoot = VectorizableTree[0]->Scalars; auto *TreeRootIT = dyn_cast(TreeRoot[0]->getType()); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll index fa0a3610cc22b..a0af8e36b36c7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll @@ -5,19 +5,14 @@ define void @t(i64 %v) { ; CHECK-LABEL: define void @t( ; CHECK-SAME: i64 [[V:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV12_1_I:%.*]] = trunc i64 [[V]] to i32 -; CHECK-NEXT: [[MUL_I_1_I:%.*]] = mul i32 [[CONV12_1_I]], 2 -; CHECK-NEXT: [[CONV12_I:%.*]] = trunc i64 [[V]] to i32 -; CHECK-NEXT: [[MUL_I_I:%.*]] = mul i32 [[CONV12_I]], 3 -; CHECK-NEXT: [[CONV14104_I:%.*]] = or i32 [[MUL_I_1_I]], [[MUL_I_I]] -; CHECK-NEXT: [[CONV12_1_I_1:%.*]] = trunc i64 [[V]] to i32 -; CHECK-NEXT: [[MUL_I_1_I_1:%.*]] = mul i32 [[CONV12_1_I_1]], 6 -; CHECK-NEXT: [[CONV12_I_1:%.*]] = trunc i64 [[V]] to i32 -; CHECK-NEXT: [[MUL_I_I_1:%.*]] = mul i32 [[CONV12_I_1]], 5 -; CHECK-NEXT: [[CONV14104_I_1:%.*]] = or i32 [[MUL_I_1_I_1]], [[MUL_I_I_1]] -; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[CONV14104_I]], [[CONV14104_I_1]] -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 65535 -; CHECK-NEXT: store i32 [[TMP1]], ptr null, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 65535 +; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll index 94446b99514ba..2c834616becc0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll @@ -5,24 +5,16 @@ define void @test(i64 %d.promoted.i) { ; CHECK-LABEL: define void @test( ; CHECK-SAME: i64 [[D_PROMOTED_I:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> , i64 [[D_PROMOTED_I]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i1> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i1> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i1> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i1> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i1> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i1> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i1> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i1> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = sext i1 [[TMP11]] to i32 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i32 -; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP12]], [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 0 -; CHECK-NEXT: store i32 [[TMP16]], ptr null, align 4 +; CHECK-NEXT: [[AND_1_I:%.*]] = and i64 0, [[D_PROMOTED_I]] +; CHECK-NEXT: [[AND_1_I_1:%.*]] = and i64 0, 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i64> , i64 [[AND_1_I_1]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i64> [[TMP0]], i64 [[AND_1_I]], i32 9 +; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i1> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 0 +; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll index f48528e502b8c..af46b4f576234 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll @@ -8,20 +8,18 @@ define i1 @test() { ; CHECK: then: ; CHECK-NEXT: br label [[ELSE]] ; CHECK: else: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i1> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 -; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP1]], <2 x i32> -; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0 +; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> +; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: br label [[ELSE1:%.*]] ; CHECK: else1: -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[BF_CAST162]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: ret i1 [[TMP8]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[BF_CAST162]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <2 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: ret i1 [[TMP6]] ; entry: br i1 false, label %then, label %else