diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7dfa74bbb4159..23c1675373806 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -8039,21 +8039,16 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, TTI::CastContextHint::None, CostKind); } - if (E->ReuseShuffleIndices.empty()) { - VecCost += - TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy); - } else { - SmallVector Mask; - buildShuffleEntryMask( - E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, - [E](Instruction *I) { - assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); - return I->getOpcode() == E->getAltOpcode(); - }, - Mask); - VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - FinalVecTy, Mask); - } + SmallVector Mask; + buildShuffleEntryMask( + E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, + [E](Instruction *I) { + assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); + return I->getOpcode() == E->getAltOpcode(); + }, + Mask); + VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + FinalVecTy, Mask); return VecCost; }; return GetCostDiff(GetScalarCost, GetVectorCost); diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index c97d8da58be44..9c3e1fbec663b 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -86,17 +86,16 @@ return: define float @test_merge_anyof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_merge_anyof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[T:%.*]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <8 x float> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <8 x float> [[TMP0]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T]] -; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i64 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[ADD]], float 0.000000e+00 +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[T_FR]], +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i1> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 +; CHECK-NEXT: [[OP_RDX_NOT:%.*]] = icmp eq i4 [[TMP3]], 0 +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[T_FR]], [[SHIFT]] +; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP4]], i64 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OP_RDX_NOT]], float [[ADD]], float 0.000000e+00 ; CHECK-NEXT: ret float [[RETVAL_0]] ; entry: @@ -401,18 +400,16 @@ return: define float @test_merge_anyof_v4si(<4 x i32> %t) { ; CHECK-LABEL: @test_merge_anyof_v4si( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[T:%.*]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]] -; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i64 0 +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i1> [[TMP1]] to i4 +; CHECK-NEXT: [[OP_RDX_NOT:%.*]] = icmp eq i4 [[TMP2]], 0 +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[T_FR]], [[SHIFT]] +; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[CONV]], float 0.000000e+00 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OP_RDX_NOT]], float [[CONV]], float 0.000000e+00 ; CHECK-NEXT: ret float [[RETVAL_0]] ; entry: