diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f68c677ad49ccf..1520a70c81f29a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -200,39 +200,12 @@ static bool isValidElementType(Type *Ty) { !Ty->isPPC_FP128Ty(); } -/// \returns True if the value is a constant (but not globals/constant -/// expressions). -static bool isConstant(Value *V) { - return isa(V) && !isa(V) && !isa(V); -} - -/// Checks if \p V is one of vector-like instructions, i.e. undef, -/// insertelement/extractelement with constant indices for fixed vector type or -/// extractvalue instruction. -static bool isVectorLikeInstWithConstOps(Value *V) { - if (!isa(V) && - !isa(V)) - return false; - auto *I = dyn_cast(V); - if (!I || isa(I)) - return true; - if (!isa(I->getOperand(0)->getType())) - return false; - if (isa(I)) - return isConstant(I->getOperand(1)); - assert(isa(V) && "Expected only insertelement."); - return isConstant(I->getOperand(2)); -} - /// \returns true if all of the instructions in \p VL are in the same block or /// false otherwise. static bool allSameBlock(ArrayRef VL) { Instruction *I0 = dyn_cast(VL[0]); if (!I0) return false; - if (all_of(VL, isVectorLikeInstWithConstOps)) - return true; - BasicBlock *BB = I0->getParent(); for (int I = 1, E = VL.size(); I < E; I++) { auto *II = dyn_cast(VL[I]); @@ -245,6 +218,12 @@ static bool allSameBlock(ArrayRef VL) { return true; } +/// \returns True if the value is a constant (but not globals/constant +/// expressions). +static bool isConstant(Value *V) { + return isa(V) && !isa(V) && !isa(V); +} + /// \returns True if all of the values in \p VL are constants (but not /// globals/constant expressions). static bool allConstant(ArrayRef VL) { @@ -5953,9 +5932,7 @@ void BoUpSLP::optimizeGatherSequence() { Optional BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, const InstructionsState &S) { - // No need to schedule PHIs, insertelement, extractelement and extractvalue - // instructions. - if (isa(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue)) + if (isa(S.OpValue) || isa(S.OpValue)) return nullptr; // Initialize the instruction bundle. @@ -6051,7 +6028,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, Value *OpValue) { - if (isa(OpValue) || isVectorLikeInstWithConstOps(OpValue)) + if (isa(OpValue) || isa(OpValue)) return; ScheduleData *Bundle = getScheduleData(OpValue); @@ -6091,9 +6068,8 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, return true; Instruction *I = dyn_cast(V); assert(I && "bundle member must be an instruction"); - assert(!isa(I) && !isVectorLikeInstWithConstOps(I) && - "phi nodes/insertelements/extractelements/extractvalues don't need to " - "be scheduled"); + assert(!isa(I) && !isa(I) && + "phi nodes/insertelements don't need to be scheduled"); auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool { ScheduleData *ISD = getScheduleData(I); if (!ISD) @@ -6363,7 +6339,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; I = I->getNextNode()) { BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) { - assert((isVectorLikeInstWithConstOps(SD->Inst) || + assert((isa(SD->Inst) || SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) && "scheduler and vectorizer bundle mismatch"); SD->FirstInBundle->SchedulingPriority = Idx++; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll index 02b88fce69f69e..9e3a886169a3f0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -6,8 +6,16 @@ define void @fextr(i16* %ptr) { ; CHECK-LABEL: @fextr( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <8 x i16>, <8 x i16>* undef, align 16 +; CHECK-NEXT: [[V0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: +; CHECK-NEXT: [[V1:%.*]] = extractelement <8 x i16> [[LD]], i32 1 +; CHECK-NEXT: [[V2:%.*]] = extractelement <8 x i16> [[LD]], i32 2 +; CHECK-NEXT: [[V3:%.*]] = extractelement <8 x i16> [[LD]], i32 3 +; CHECK-NEXT: [[V4:%.*]] = extractelement <8 x i16> [[LD]], i32 4 +; CHECK-NEXT: [[V5:%.*]] = extractelement <8 x i16> [[LD]], i32 5 +; CHECK-NEXT: [[V6:%.*]] = extractelement <8 x i16> [[LD]], i32 6 +; CHECK-NEXT: [[V7:%.*]] = extractelement <8 x i16> [[LD]], i32 7 ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 1 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2 @@ -16,12 +24,18 @@ define void @fextr(i16* %ptr) { ; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5 ; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6 ; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[V0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[V1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[V2]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[V3]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[V4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[V5]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[V6]], i32 6 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[V7]], i32 7 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i16> [[TMP7]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP8]], <8 x i16>* [[TMP9]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer @@ -29,7 +43,7 @@ define void @fextr(i16* %ptr) { ; YAML-NEXT: Function: fextr ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-20' +; YAML-NEXT: - Cost: '-4' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '4'