diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index facb0fabdf57e..f7968abbe5b6b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7543,12 +7543,13 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef Operands, } if (LoadInst *Load = dyn_cast(I)) return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse, - VPIRMetadata(*Load, LVer), I->getDebugLoc()); + Load->getAlign(), VPIRMetadata(*Load, LVer), + I->getDebugLoc()); StoreInst *Store = cast(I); return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive, - Reverse, VPIRMetadata(*Store, LVer), - I->getDebugLoc()); + Reverse, Store->getAlign(), + VPIRMetadata(*Store, LVer), I->getDebugLoc()); } /// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5b9f005e50e47..1f10058ab4a9a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3179,6 +3179,9 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase, protected: Instruction &Ingredient; + /// Alignment information for this memory access. + Align Alignment; + /// Whether the accessed addresses are consecutive. bool Consecutive; @@ -3198,10 +3201,10 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase, VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list Operands, - bool Consecutive, bool Reverse, + bool Consecutive, bool Reverse, Align Alignment, const VPIRMetadata &Metadata, DebugLoc DL) : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I), - Consecutive(Consecutive), Reverse(Reverse) { + Alignment(Alignment), Consecutive(Consecutive), Reverse(Reverse) { assert((Consecutive || !Reverse) && "Reverse implies consecutive"); } @@ -3242,6 +3245,9 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase, return isMasked() ? getOperand(getNumOperands() - 1) : nullptr; } + /// Returns the alignment of the memory access. + Align getAlign() const { return Alignment; } + /// Generate the wide load/store. void execute(VPTransformState &State) override { llvm_unreachable("VPWidenMemoryRecipe should not be instantiated."); @@ -3259,18 +3265,18 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase, struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue { VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, - bool Consecutive, bool Reverse, + bool Consecutive, bool Reverse, Align Alignment, const VPIRMetadata &Metadata, DebugLoc DL) : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive, - Reverse, Metadata, DL), + Reverse, Alignment, Metadata, DL), VPValue(this, &Load) { setMask(Mask); } VPWidenLoadRecipe *clone() override { return new VPWidenLoadRecipe(cast(Ingredient), getAddr(), - getMask(), Consecutive, Reverse, *this, - getDebugLoc()); + getMask(), Consecutive, Reverse, getAlign(), + *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC); @@ -3301,8 +3307,8 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue { VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask) : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(), - {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L, - L.getDebugLoc()), + {Addr, &EVL}, L.isConsecutive(), L.isReverse(), + L.getAlign(), L, L.getDebugLoc()), VPValue(this, &getIngredient()) { setMask(Mask); } @@ -3340,16 +3346,16 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue { struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe { VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, - const VPIRMetadata &Metadata, DebugLoc DL) + Align Alignment, const VPIRMetadata &Metadata, DebugLoc DL) : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal}, - Consecutive, Reverse, Metadata, DL) { + Consecutive, Reverse, Alignment, Metadata, DL) { setMask(Mask); } VPWidenStoreRecipe *clone() override { return new VPWidenStoreRecipe(cast(Ingredient), getAddr(), getStoredValue(), getMask(), Consecutive, - Reverse, *this, getDebugLoc()); + Reverse, getAlign(), *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC); @@ -3384,7 +3390,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe { VPValue *Mask) : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(), {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(), - S.isReverse(), S, S.getDebugLoc()) { + S.isReverse(), S.getAlign(), S, S.getDebugLoc()) { setMask(Mask); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 0b8b5ff1517c3..779ddb8e3753f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3525,7 +3525,6 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); - const Align Alignment = getLoadStoreAlignment(&Ingredient); unsigned AS = cast(Ctx.Types.inferScalarType(getAddr())) ->getAddressSpace(); unsigned Opcode = isa(this) @@ -3575,7 +3574,6 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, void VPWidenLoadRecipe::execute(VPTransformState &State) { Type *ScalarDataTy = getLoadStoreType(&Ingredient); auto *DataTy = VectorType::get(ScalarDataTy, State.VF); - const Align Alignment = getLoadStoreAlignment(&Ingredient); bool CreateGather = !isConsecutive(); auto &Builder = State.Builder; @@ -3630,7 +3628,6 @@ static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand, void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { Type *ScalarDataTy = getLoadStoreType(&Ingredient); auto *DataTy = VectorType::get(ScalarDataTy, State.VF); - const Align Alignment = getLoadStoreAlignment(&Ingredient); bool CreateGather = !isConsecutive(); auto &Builder = State.Builder; @@ -3674,7 +3671,6 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF, // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we // don't need to compare to the legacy cost model. Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); - const Align Alignment = getLoadStoreAlignment(&Ingredient); unsigned AS = cast(Ctx.Types.inferScalarType(getAddr())) ->getAddressSpace(); InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( @@ -3700,7 +3696,6 @@ void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent, void VPWidenStoreRecipe::execute(VPTransformState &State) { VPValue *StoredVPValue = getStoredValue(); bool CreateScatter = !isConsecutive(); - const Align Alignment = getLoadStoreAlignment(&Ingredient); auto &Builder = State.Builder; @@ -3743,7 +3738,6 @@ void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent, void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { VPValue *StoredValue = getStoredValue(); bool CreateScatter = !isConsecutive(); - const Align Alignment = getLoadStoreAlignment(&Ingredient); auto &Builder = State.Builder; @@ -3786,7 +3780,6 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF, // TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we // don't need to compare to the legacy cost model. Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF); - const Align Alignment = getLoadStoreAlignment(&Ingredient); unsigned AS = cast(Ctx.Types.inferScalarType(getAddr())) ->getAddressSpace(); InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost( diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 84817d78a077a..079aa189475df 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -91,13 +91,14 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( if (LoadInst *Load = dyn_cast(Inst)) { NewRecipe = new VPWidenLoadRecipe( *Load, Ingredient.getOperand(0), nullptr /*Mask*/, - false /*Consecutive*/, false /*Reverse*/, VPIRMetadata(*Load), - Ingredient.getDebugLoc()); + false /*Consecutive*/, false /*Reverse*/, Load->getAlign(), + VPIRMetadata(*Load), Ingredient.getDebugLoc()); } else if (StoreInst *Store = dyn_cast(Inst)) { NewRecipe = new VPWidenStoreRecipe( *Store, Ingredient.getOperand(1), Ingredient.getOperand(0), nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, - VPIRMetadata(*Store), Ingredient.getDebugLoc()); + Store->getAlign(), VPIRMetadata(*Store), + Ingredient.getDebugLoc()); } else if (GetElementPtrInst *GEP = dyn_cast(Inst)) { NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); } else if (CallInst *CI = dyn_cast(Inst)) { @@ -4234,10 +4235,11 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, if (auto *LoadGroup = dyn_cast(R)) { // Narrow interleave group to wide load, as transformed VPlan will only // process one original iteration. + auto *LI = + cast(LoadGroup->getInterleaveGroup()->getInsertPos()); auto *L = new VPWidenLoadRecipe( - *cast(LoadGroup->getInterleaveGroup()->getInsertPos()), - LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true, - /*Reverse=*/false, {}, LoadGroup->getDebugLoc()); + *LI, LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true, + /*Reverse=*/false, LI->getAlign(), {}, LoadGroup->getDebugLoc()); L->insertBefore(LoadGroup); NarrowedOps.insert(L); return L; @@ -4280,10 +4282,11 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, Res = NarrowOp(Member0); } + auto *SI = + cast(StoreGroup->getInterleaveGroup()->getInsertPos()); auto *S = new VPWidenStoreRecipe( - *cast(StoreGroup->getInterleaveGroup()->getInsertPos()), - StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true, - /*Reverse=*/false, {}, StoreGroup->getDebugLoc()); + *SI, StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true, + /*Reverse=*/false, SI->getAlign(), {}, StoreGroup->getDebugLoc()); S->insertBefore(StoreGroup); StoreGroup->eraseFromParent(); } diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index c1791dfa5b761..59a9ea1a720b3 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1132,7 +1132,8 @@ TEST_F(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) { new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1)); VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); - VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {}, {}); + VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, Load->getAlign(), {}, + {}); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -1249,7 +1250,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1)); VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); - VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {}, {}); + VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, Load->getAlign(), + {}, {}); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_TRUE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1263,8 +1265,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); VPValue *StoredV = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3)); - VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, {}, - {}); + VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, + Store->getAlign(), {}, {}); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_TRUE(Recipe.mayWriteToMemory());