diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 20059f9d62d55..a8272f4502535 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -545,11 +545,6 @@ class InnerLoopVectorizer { // Return true if any runtime check is added. bool areSafetyChecksAdded() { return AddedSafetyChecks; } - /// A type for vectorized values in the new loop. Each value from the - /// original loop, when vectorized, is represented by UF vector values in the - /// new unrolled loop, where UF is the unroll factor. - using VectorParts = SmallVector; - /// A helper function to scalarize a single Instruction in the innermost loop. /// Generates a sequence of scalar instances for each lane between \p MinLane /// and \p MaxLane, times each part between \p MinPart and \p MaxPart, @@ -8086,7 +8081,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) { BlockMaskCache[BB] = BlockMask; } -VPWidenMemoryInstructionRecipe * +VPWidenMemoryRecipe * VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef Operands, VFRange &Range) { assert((isa(I) || isa(I)) && @@ -8131,12 +8126,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef Operands, Ptr = VectorPtr; } if (LoadInst *Load = dyn_cast(I)) - return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive, - Reverse, I->getDebugLoc()); + return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse, + I->getDebugLoc()); StoreInst *Store = cast(I); - return new VPWidenMemoryInstructionRecipe( - *Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc()); + return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive, + Reverse, I->getDebugLoc()); } /// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also @@ -8775,13 +8770,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { // for this VPlan, replace the Recipes widening its memory instructions with a // single VPInterleaveRecipe at its insertion point. for (const auto *IG : InterleaveGroups) { - auto *Recipe = cast( - RecipeBuilder.getRecipe(IG->getInsertPos())); + auto *Recipe = + cast(RecipeBuilder.getRecipe(IG->getInsertPos())); SmallVector StoredValues; for (unsigned i = 0; i < IG->getFactor(); ++i) if (auto *SI = dyn_cast_or_null(IG->getMember(i))) { - auto *StoreR = - cast(RecipeBuilder.getRecipe(SI)); + auto *StoreR = cast(RecipeBuilder.getRecipe(SI)); StoredValues.push_back(StoreR->getStoredValue()); } @@ -9368,92 +9362,27 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder, return Call; } -void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { - VPValue *StoredValue = isStore() ? getStoredValue() : nullptr; - - // Attempt to issue a wide load. - LoadInst *LI = dyn_cast(&Ingredient); - StoreInst *SI = dyn_cast(&Ingredient); - - assert((LI || SI) && "Invalid Load/Store instruction"); - assert((!SI || StoredValue) && "No stored value provided for widened store"); - assert((!LI || !StoredValue) && "Stored value provided for widened load"); +void VPWidenLoadRecipe::execute(VPTransformState &State) { + auto *LI = cast(&Ingredient); Type *ScalarDataTy = getLoadStoreType(&Ingredient); - auto *DataTy = VectorType::get(ScalarDataTy, State.VF); const Align Alignment = getLoadStoreAlignment(&Ingredient); - bool CreateGatherScatter = !isConsecutive(); + bool CreateGather = !isConsecutive(); auto &Builder = State.Builder; - InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF); - bool isMaskRequired = getMask(); - if (isMaskRequired) { - // Mask reversal is only needed for non-all-one (null) masks, as reverse of - // a null all-one mask is a null mask. - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *Mask = State.get(getMask(), Part); + State.setDebugLocFrom(getDebugLoc()); + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *NewLI; + Value *Mask = nullptr; + if (auto *VPMask = getMask()) { + // Mask reversal is only needed for non-all-one (null) masks, as reverse + // of a null all-one mask is a null mask. + Mask = State.get(VPMask, Part); if (isReverse()) Mask = Builder.CreateVectorReverse(Mask, "reverse"); - BlockInMaskParts[Part] = Mask; - } - } - - // Handle Stores: - if (SI) { - State.setDebugLocFrom(getDebugLoc()); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Instruction *NewSI = nullptr; - Value *StoredVal = State.get(StoredValue, Part); - // TODO: split this into several classes for better design. - if (State.EVL) { - assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with " - "explicit vector length."); - assert(cast(State.EVL)->getOpcode() == - VPInstruction::ExplicitVectorLength && - "EVL must be VPInstruction::ExplicitVectorLength."); - Value *EVL = State.get(State.EVL, VPIteration(0, 0)); - // If EVL is not nullptr, then EVL must be a valid value set during plan - // creation, possibly default value = whole vector register length. EVL - // is created only if TTI prefers predicated vectorization, thus if EVL - // is not nullptr it also implies preference for predicated - // vectorization. - // FIXME: Support reverse store after vp_reverse is added. - Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; - NewSI = lowerStoreUsingVectorIntrinsics( - Builder, State.get(getAddr(), Part, !CreateGatherScatter), - StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment); - } else if (CreateGatherScatter) { - Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; - Value *VectorGep = State.get(getAddr(), Part); - NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, - MaskPart); - } else { - if (isReverse()) { - // If we store to reverse consecutive memory locations, then we need - // to reverse the order of elements in the stored value. - StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); - // We don't want to update the value in the map as it might be used in - // another expression. So don't call resetVectorValue(StoredVal). - } - auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true); - if (isMaskRequired) - NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, - BlockInMaskParts[Part]); - else - NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); - } - State.addMetadata(NewSI, SI); } - return; - } - // Handle loads. - assert(LI && "Must have a load instruction"); - State.setDebugLocFrom(getDebugLoc()); - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *NewLI; // TODO: split this into several classes for better design. if (State.EVL) { assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with " @@ -9468,22 +9397,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { // is not nullptr it also implies preference for predicated // vectorization. // FIXME: Support reverse loading after vp_reverse is added. - Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; NewLI = lowerLoadUsingVectorIntrinsics( - Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter), - CreateGatherScatter, MaskPart, EVL, Alignment); - } else if (CreateGatherScatter) { - Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; + Builder, DataTy, State.get(getAddr(), Part, !CreateGather), + CreateGather, Mask, EVL, Alignment); + } else if (CreateGather) { Value *VectorGep = State.get(getAddr(), Part); - NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart, + NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, Mask, nullptr, "wide.masked.gather"); State.addMetadata(NewLI, LI); } else { auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true); - if (isMaskRequired) - NewLI = Builder.CreateMaskedLoad( - DataTy, VecPtr, Alignment, BlockInMaskParts[Part], - PoisonValue::get(DataTy), "wide.masked.load"); + if (Mask) + NewLI = Builder.CreateMaskedLoad(DataTy, VecPtr, Alignment, Mask, + PoisonValue::get(DataTy), + "wide.masked.load"); else NewLI = Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load"); @@ -9494,7 +9421,69 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { NewLI = Builder.CreateVectorReverse(NewLI, "reverse"); } - State.set(getVPSingleValue(), NewLI, Part); + State.set(this, NewLI, Part); + } +} + +void VPWidenStoreRecipe::execute(VPTransformState &State) { + auto *SI = cast(&Ingredient); + + VPValue *StoredVPValue = getStoredValue(); + bool CreateScatter = !isConsecutive(); + const Align Alignment = getLoadStoreAlignment(&Ingredient); + + auto &Builder = State.Builder; + State.setDebugLocFrom(getDebugLoc()); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Instruction *NewSI = nullptr; + Value *Mask = nullptr; + if (auto *VPMask = getMask()) { + // Mask reversal is only needed for non-all-one (null) masks, as reverse + // of a null all-one mask is a null mask. + Mask = State.get(VPMask, Part); + if (isReverse()) + Mask = Builder.CreateVectorReverse(Mask, "reverse"); + } + + Value *StoredVal = State.get(StoredVPValue, Part); + if (isReverse()) { + assert(!State.EVL && "reversing not yet implemented with EVL"); + // If we store to reverse consecutive memory locations, then we need + // to reverse the order of elements in the stored value. + StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); + // We don't want to update the value in the map as it might be used in + // another expression. So don't call resetVectorValue(StoredVal). + } + // TODO: split this into several classes for better design. + if (State.EVL) { + assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with " + "explicit vector length."); + assert(cast(State.EVL)->getOpcode() == + VPInstruction::ExplicitVectorLength && + "EVL must be VPInstruction::ExplicitVectorLength."); + Value *EVL = State.get(State.EVL, VPIteration(0, 0)); + // If EVL is not nullptr, then EVL must be a valid value set during plan + // creation, possibly default value = whole vector register length. EVL + // is created only if TTI prefers predicated vectorization, thus if EVL + // is not nullptr it also implies preference for predicated + // vectorization. + // FIXME: Support reverse store after vp_reverse is added. + NewSI = lowerStoreUsingVectorIntrinsics( + Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal, + CreateScatter, Mask, EVL, Alignment); + } else if (CreateScatter) { + Value *VectorGep = State.get(getAddr(), Part); + NewSI = + Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, Mask); + } else { + auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true); + if (Mask) + NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, Mask); + else + NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); + } + State.addMetadata(NewSI, SI); } } diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 605b47fa0a46b..b4c7ab02f928f 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -69,9 +69,9 @@ class VPRecipeBuilder { /// Check if the load or store instruction \p I should widened for \p /// Range.Start and potentially masked. Such instructions are handled by a /// recipe that takes an additional VPInstruction for the mask. - VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, - ArrayRef Operands, - VFRange &Range); + VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I, + ArrayRef Operands, + VFRange &Range); /// Check if an induction recipe should be constructed for \p Phi. If so build /// and return it. If not, return null. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index d86a81d4fb4c7..148227f1f1a57 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -875,7 +875,8 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { return true; case VPRecipeBase::VPInterleaveSC: case VPRecipeBase::VPBranchOnMaskSC: - case VPRecipeBase::VPWidenMemoryInstructionSC: + case VPRecipeBase::VPWidenLoadSC: + case VPRecipeBase::VPWidenStoreSC: // TODO: Widened stores don't define a value, but widened loads do. Split // the recipes to be able to make widened loads VPSingleDefRecipes. return false; @@ -2280,68 +2281,62 @@ class VPPredInstPHIRecipe : public VPSingleDefRecipe { } }; -/// A Recipe for widening load/store operations. -/// The recipe uses the following VPValues: -/// - For load: Address, optional mask -/// - For store: Address, stored value, optional mask -/// TODO: We currently execute only per-part unless a specific instance is -/// provided. -class VPWidenMemoryInstructionRecipe : public VPRecipeBase { +/// A common base class for widening memory operations. An optional mask can be +/// provided as the last operand. +class VPWidenMemoryRecipe : public VPRecipeBase { +protected: Instruction &Ingredient; - // Whether the loaded-from / stored-to addresses are consecutive. + /// Whether the accessed addresses are consecutive. bool Consecutive; - // Whether the consecutive loaded/stored addresses are in reverse order. + /// Whether the consecutive accessed addresses are in reverse order. bool Reverse; + /// Whether the memory access is masked. + bool IsMasked = false; + void setMask(VPValue *Mask) { + assert(!IsMasked && "cannot re-set mask"); if (!Mask) return; addOperand(Mask); + IsMasked = true; } - bool isMasked() const { - return isStore() ? getNumOperands() == 3 : getNumOperands() == 2; + VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, + std::initializer_list Operands, + bool Consecutive, bool Reverse, DebugLoc DL) + : VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive), + Reverse(Reverse) { + assert((Consecutive || !Reverse) && "Reverse implies consecutive"); } public: - VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, - bool Consecutive, bool Reverse, DebugLoc DL) - : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}, DL), - Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) { - assert((Consecutive || !Reverse) && "Reverse implies consecutive"); - new VPValue(this, &Load); - setMask(Mask); - } + VPWidenMemoryRecipe *clone() override = 0; - VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, - VPValue *StoredValue, VPValue *Mask, - bool Consecutive, bool Reverse, DebugLoc DL) - : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}, - DL), - Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { - assert((Consecutive || !Reverse) && "Reverse implies consecutive"); - setMask(Mask); + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPDef::VPWidenLoadSC || + R->getVPDefID() == VPDef::VPWidenStoreSC; } - VPWidenMemoryInstructionRecipe *clone() override { - if (isStore()) - return new VPWidenMemoryInstructionRecipe( - cast(Ingredient), getAddr(), getStoredValue(), getMask(), - Consecutive, Reverse, getDebugLoc()); - - return new VPWidenMemoryInstructionRecipe(cast(Ingredient), - getAddr(), getMask(), Consecutive, - Reverse, getDebugLoc()); + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast(U); + return R && classof(R); } - VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC) + /// Return whether the loaded-from / stored-to addresses are consecutive. + bool isConsecutive() const { return Consecutive; } + + /// Return whether the consecutive loaded/stored addresses are in reverse + /// order. + bool isReverse() const { return Reverse; } /// Return the address accessed by this recipe. - VPValue *getAddr() const { - return getOperand(0); // Address is the 1st, mandatory operand. - } + VPValue *getAddr() const { return getOperand(0); } + + /// Returns true if the recipe is masked. + bool isMasked() const { return IsMasked; } /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. @@ -2350,23 +2345,34 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { return isMasked() ? getOperand(getNumOperands() - 1) : nullptr; } - /// Returns true if this recipe is a store. - bool isStore() const { return isa(Ingredient); } + /// Generate the wide load/store. + void execute(VPTransformState &State) override { + llvm_unreachable("VPWidenMemoryRecipe should not be instantiated."); + } - /// Return the address accessed by this recipe. - VPValue *getStoredValue() const { - assert(isStore() && "Stored value only available for store instructions"); - return getOperand(1); // Stored value is the 2nd, mandatory operand. + Instruction &getIngredient() const { return Ingredient; } +}; + +/// A recipe for widening load operations, using the address to load from and an +/// optional mask. +struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue { + VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, + bool Consecutive, bool Reverse, DebugLoc DL) + : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive, + Reverse, DL), + VPValue(this, &Load) { + setMask(Mask); } - // Return whether the loaded-from / stored-to addresses are consecutive. - bool isConsecutive() const { return Consecutive; } + VPWidenLoadRecipe *clone() override { + return new VPWidenLoadRecipe(cast(Ingredient), getAddr(), + getMask(), Consecutive, Reverse, + getDebugLoc()); + } - // Return whether the consecutive loaded/stored addresses are in reverse - // order. - bool isReverse() const { return Reverse; } + VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC); - /// Generate the wide load/store. + /// Generate a wide load or gather. void execute(VPTransformState &State) override; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2380,16 +2386,51 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); - // Widened, consecutive memory operations only demand the first lane of - // their address, unless the same operand is also stored. That latter can - // happen with opaque pointers. - return Op == getAddr() && isConsecutive() && - (!isStore() || Op != getStoredValue()); + // Widened, consecutive loads operations only demand the first lane of + // their address. + return Op == getAddr() && isConsecutive(); } - - Instruction &getIngredient() const { return Ingredient; } }; +/// A recipe for widening store operations, using the stored value, the address +/// to store to and an optional mask. +struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe { + VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, + VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL) + : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal}, + Consecutive, Reverse, DL) { + setMask(Mask); + } + + VPWidenStoreRecipe *clone() override { + return new VPWidenStoreRecipe(cast(Ingredient), getAddr(), + getStoredValue(), getMask(), Consecutive, + Reverse, getDebugLoc()); + } + + VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC); + + /// Return the value stored by this recipe. + VPValue *getStoredValue() const { return getOperand(1); } + + /// Generate a wide store or scatter. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + // Widened, consecutive stores only demand the first lane of their address, + // unless the same operand is also stored. + return Op == getAddr() && isConsecutive() && Op != getStoredValue(); + } +}; /// Recipe to expand a SCEV expression. class VPExpandSCEVRecipe : public VPSingleDefRecipe { const SCEV *Expr; diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index c8ae2ee5a30fe..130fb04f586e7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -108,9 +108,9 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) { return CI.getType(); } -Type *VPTypeAnalysis::inferScalarTypeForRecipe( - const VPWidenMemoryInstructionRecipe *R) { - assert(!R->isStore() && "Store recipes should not define any values"); +Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) { + assert(isa(R) && + "Store recipes should not define any values"); return cast(&R->getIngredient())->getType(); } @@ -231,8 +231,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { return inferScalarType(R->getOperand(0)); }) .Case( + VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>( [this](const auto *R) { return inferScalarTypeForRecipe(R); }) .Case([V](const VPInterleaveRecipe *R) { // TODO: Use info from interleave group. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h index 4e69de7fd6812..7d310b1b31b6f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h @@ -20,7 +20,7 @@ class VPInstruction; class VPWidenRecipe; class VPWidenCallRecipe; class VPWidenIntOrFpInductionRecipe; -class VPWidenMemoryInstructionRecipe; +class VPWidenMemoryRecipe; struct VPWidenSelectRecipe; class VPReplicateRecipe; class Type; @@ -46,7 +46,7 @@ class VPTypeAnalysis { Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R); Type *inferScalarTypeForRecipe(const VPWidenRecipe *R); Type *inferScalarTypeForRecipe(const VPWidenIntOrFpInductionRecipe *R); - Type *inferScalarTypeForRecipe(const VPWidenMemoryInstructionRecipe *R); + Type *inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R); Type *inferScalarTypeForRecipe(const VPWidenSelectRecipe *R); Type *inferScalarTypeForRecipe(const VPReplicateRecipe *R); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 9f242a1bee8f6..78932643c81fa 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -47,9 +47,8 @@ bool VPRecipeBase::mayWriteToMemory() const { switch (getVPDefID()) { case VPInterleaveSC: return cast(this)->getNumStoreOperands() > 0; - case VPWidenMemoryInstructionSC: { - return cast(this)->isStore(); - } + case VPWidenStoreSC: + return true; case VPReplicateSC: case VPWidenCallSC: return cast(getVPSingleValue()->getUnderlyingValue()) @@ -64,6 +63,7 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPWidenCastSC: case VPWidenGEPSC: case VPWidenIntOrFpInductionSC: + case VPWidenLoadSC: case VPWidenPHISC: case VPWidenSC: case VPWidenSelectSC: { @@ -81,16 +81,16 @@ bool VPRecipeBase::mayWriteToMemory() const { bool VPRecipeBase::mayReadFromMemory() const { switch (getVPDefID()) { - case VPWidenMemoryInstructionSC: { - return !cast(this)->isStore(); - } + case VPWidenLoadSC: + return true; case VPReplicateSC: case VPWidenCallSC: return cast(getVPSingleValue()->getUnderlyingValue()) ->mayReadFromMemory(); case VPBranchOnMaskSC: - case VPScalarIVStepsSC: case VPPredInstPHISC: + case VPScalarIVStepsSC: + case VPWidenStoreSC: return false; case VPBlendSC: case VPReductionSC: @@ -155,12 +155,13 @@ bool VPRecipeBase::mayHaveSideEffects() const { } case VPInterleaveSC: return mayWriteToMemory(); - case VPWidenMemoryInstructionSC: - assert(cast(this) - ->getIngredient() - .mayHaveSideEffects() == mayWriteToMemory() && - "mayHaveSideffects result for ingredient differs from this " - "implementation"); + case VPWidenLoadSC: + case VPWidenStoreSC: + assert( + cast(this)->getIngredient().mayHaveSideEffects() == + mayWriteToMemory() && + "mayHaveSideffects result for ingredient differs from this " + "implementation"); return mayWriteToMemory(); case VPReplicateSC: { auto *R = cast(this); @@ -1769,16 +1770,17 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, printOperands(O, SlotTracker); } -void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { +void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN "; + printAsOperand(O, SlotTracker); + O << " = load "; + printOperands(O, SlotTracker); +} - if (!isStore()) { - getVPSingleValue()->printAsOperand(O, SlotTracker); - O << " = "; - } - O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; - +void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN store "; printOperands(O, SlotTracker); } #endif diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 1256e4d8fda50..382bf5ac11405 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -60,14 +60,14 @@ void VPlanTransforms::VPInstructionsToVPRecipes( assert(isa(&Ingredient) && "only VPInstructions expected here"); assert(!isa(Inst) && "phis should be handled above"); - // Create VPWidenMemoryInstructionRecipe for loads and stores. + // Create VPWidenMemoryRecipe for loads and stores. if (LoadInst *Load = dyn_cast(Inst)) { - NewRecipe = new VPWidenMemoryInstructionRecipe( + NewRecipe = new VPWidenLoadRecipe( *Load, Ingredient.getOperand(0), nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, Ingredient.getDebugLoc()); } else if (StoreInst *Store = dyn_cast(Inst)) { - NewRecipe = new VPWidenMemoryInstructionRecipe( + NewRecipe = new VPWidenStoreRecipe( *Store, Ingredient.getOperand(1), Ingredient.getOperand(0), nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, Ingredient.getDebugLoc()); @@ -977,10 +977,9 @@ void VPlanTransforms::truncateToMinimalBitwidths( vp_depth_first_deep(Plan.getVectorLoopRegion()))) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (!isa(&R)) + VPWidenSelectRecipe, VPWidenMemoryRecipe>(&R)) continue; - if (isa(&R) && - cast(&R)->isStore()) + if (isa(&R)) continue; VPValue *ResultVPV = R.getVPSingleValue(); @@ -1048,10 +1047,9 @@ void VPlanTransforms::truncateToMinimalBitwidths( assert(cast(&R)->getOpcode() == Instruction::ICmp && "Only ICmps should not need extending the result."); - if (isa(&R)) { - assert(!cast(&R)->isStore() && "stores cannot be narrowed"); + assert(!isa(&R) && "stores cannot be narrowed"); + if (isa(&R)) continue; - } // Shrink operands by introducing truncates as needed. unsigned StartIdx = isa(&R) ? 1 : 0; @@ -1315,7 +1313,7 @@ void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) { ConstantInt::getTrue(CanonicalIVPHI->getScalarType()->getContext()); VPValue *VPTrueMask = Plan.getOrAddLiveIn(TrueMask); replaceHeaderPredicateWith(Plan, *VPTrueMask, [](VPUser &U, unsigned) { - return isa(U); + return isa(U); }); // Now create the ExplicitVectorLengthPhi recipe in the main loop. auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc()); @@ -1371,8 +1369,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( // instruction. Widen memory instructions involved in address computation // will lead to gather/scatter instructions, which don't need to be // handled. - if (isa(CurRec) || - isa(CurRec) || + if (isa(CurRec) || isa(CurRec) || isa(CurRec) || isa(CurRec)) continue; @@ -1420,7 +1417,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( auto Iter = vp_depth_first_deep(Plan.getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { for (VPRecipeBase &Recipe : *VPBB) { - if (auto *WidenRec = dyn_cast(&Recipe)) { + if (auto *WidenRec = dyn_cast(&Recipe)) { Instruction &UnderlyingInstr = WidenRec->getIngredient(); VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe(); if (AddrDef && WidenRec->isConsecutive() && diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 3f8d4f4fe7d64..0bbc7ffb4a2fe 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -36,7 +36,6 @@ class VPDef; class VPSlotTracker; class VPUser; class VPRecipeBase; -class VPWidenMemoryInstructionRecipe; // This is the base class of the VPlan Def/Use graph, used for modeling the data // flow into, within and out of the VPlan. VPValues can stand for live-ins @@ -51,7 +50,6 @@ class VPValue { friend class VPInterleavedAccessInfo; friend class VPSlotTracker; friend class VPRecipeBase; - friend class VPWidenMemoryInstructionRecipe; const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -358,7 +356,8 @@ class VPDef { VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, - VPWidenMemoryInstructionSC, + VPWidenLoadSC, + VPWidenStoreSC, VPWidenSC, VPWidenSelectSC, VPBlendSC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 12d37fa711db9..5587302207acd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -128,7 +128,7 @@ static bool verifyVPBasicBlock(const VPBasicBlock *VPBB, } return true; } - if (isa(R)) + if (isa(R)) VPWidenMemRecipe = R; return true; }; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll index d5ace655fdcc1..c22613509be4f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -46,8 +46,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -24 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -56 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) +; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE4]], <4 x double> poison) ; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], ; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index eea2894f82794..aea72b7de5f42 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -1400,15 +1400,15 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP20]], i32 -12 ; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -3 ; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP16]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP18]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP19]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP25]], i32 8, <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope !21 ; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> +; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP27]], i32 8, <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope !21 ; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD15]], <4 x double> poison, <4 x i32> +; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP18]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP29]], i32 8, <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope !21 ; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD18]], <4 x double> poison, <4 x i32> +; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP19]], <4 x i1> poison, <4 x i32> ; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP31]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope !21 ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP32:%.*]] = fadd <4 x double> [[REVERSE13]], @@ -1524,15 +1524,15 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP20]], i32 -24 ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -7 ; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP16]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP17]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP18]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP19]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP25]], i32 8, <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope !34 ; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> +; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP17]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP27]], i32 8, <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope !34 ; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD15]], <8 x double> poison, <8 x i32> +; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP18]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP29]], i32 8, <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope !34 ; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD18]], <8 x double> poison, <8 x i32> +; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP19]], <8 x i1> poison, <8 x i32> ; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP31]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope !34 ; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP32:%.*]] = fadd <8 x double> [[REVERSE13]], diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index 777675b623f32..2b25c62ac2f65 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -192,9 +192,9 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { auto Iter = VecBB->begin(); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); - EXPECT_NE(nullptr, dyn_cast(&*Iter++)); + EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); - EXPECT_NE(nullptr, dyn_cast(&*Iter++)); + EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index cb8737a9e64d2..64e9c06db3fe8 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1029,7 +1029,7 @@ TEST(VPRecipeTest, CastVPBranchOnMaskRecipeToVPUser) { EXPECT_EQ(&Recipe, BaseR); } -TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUserAndVPDef) { +TEST(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) { LLVMContext C; IntegerType *Int32 = IntegerType::get(C, 32); @@ -1038,7 +1038,7 @@ TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUserAndVPDef) { new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1)); VPValue Addr; VPValue Mask; - VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false, {}); + VPWidenLoadRecipe Recipe(*Load, &Addr, &Mask, true, false, {}); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -1133,7 +1133,7 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1)); VPValue Addr; VPValue Mask; - VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false, {}); + VPWidenLoadRecipe Recipe(*Load, &Addr, &Mask, true, false, {}); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_TRUE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1147,8 +1147,7 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { VPValue Addr; VPValue Mask; VPValue StoredV; - VPWidenMemoryInstructionRecipe Recipe(*Store, &Addr, &StoredV, &Mask, false, - false, {}); + VPWidenStoreRecipe Recipe(*Store, &Addr, &StoredV, &Mask, false, false, {}); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_TRUE(Recipe.mayWriteToMemory());