Skip to content

Commit

Permalink
[LV] Replace stored value with a VPValue (NFCI)
Browse files Browse the repository at this point in the history
InnerLoopVectorizer's code called during VPlan execution still relies on
original IR's def-use relations to decide which vector code to generate,
limiting VPlan transformations ability to modify def-use relations and still
have ILV generate the vector code.
This commit introduces a VPValue for VPWidenMemoryInstructionRecipe to use as
the stored value. The recipe is generated with a VPValue wrapping the stored
value of the scalar store. This reduces ingredient def-use usage by ILV as a
step towards full VPlan-based def-use relations.

Differential Revision: https://reviews.llvm.org/D76373
  • Loading branch information
aniragil committed Mar 25, 2020
1 parent d72c586 commit 078c863
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 14 deletions.
20 changes: 15 additions & 5 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -486,8 +486,8 @@ class InnerLoopVectorizer {
/// non-null. Use \p State to translate given VPValues to IR values in the
/// vectorized loop.
void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
VPValue *Addr,
VPValue *BlockInMask = nullptr);
VPValue *Addr, VPValue *StoredValue,
VPValue *BlockInMask);

/// Set the debug location in the builder using the debug location in
/// the instruction.
Expand Down Expand Up @@ -2348,12 +2348,15 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
VPTransformState &State,
VPValue *Addr,
VPValue *StoredValue,
VPValue *BlockInMask) {
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(Instr);
StoreInst *SI = dyn_cast<StoreInst>(Instr);

assert((LI || SI) && "Invalid Load/Store instruction");
assert((!SI || StoredValue) && "No stored value provided for widened store");
assert((!LI || !StoredValue) && "Stored value provided for widened load");

LoopVectorizationCostModel::InstWidening Decision =
Cost->getWideningDecision(Instr, VF);
Expand Down Expand Up @@ -2425,7 +2428,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,

for (unsigned Part = 0; Part < UF; ++Part) {
Instruction *NewSI = nullptr;
Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part);
Value *StoredVal = State.get(StoredValue, Part);
if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(Addr, Part);
Expand Down Expand Up @@ -6807,7 +6810,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
Mask = createBlockInMask(I->getParent(), Plan);

VPValue *Addr = Plan->getOrAddVPValue(getLoadStorePointerOperand(I));
return new VPWidenMemoryInstructionRecipe(*I, Addr, Mask);
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenMemoryInstructionRecipe(*Load, Addr, Mask);

StoreInst *Store = cast<StoreInst>(I);
VPValue *StoredValue = Plan->getOrAddVPValue(Store->getValueOperand());
return new VPWidenMemoryInstructionRecipe(*Store, Addr, StoredValue, Mask);
}

VPWidenIntOrFpInductionRecipe *
Expand Down Expand Up @@ -7515,7 +7523,9 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
}

void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), getMask());
VPValue *StoredValue = isa<StoreInst>(Instr) ? getStoredValue() : nullptr;
State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), StoredValue,
getMask());
}

// Determine how to lower the scalar epilogue, which depends on 1) optimising
Expand Down
40 changes: 33 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Expand Up @@ -1061,19 +1061,38 @@ class VPPredInstPHIRecipe : public VPRecipeBase {
};

/// A Recipe for widening load/store operations.
/// The recipe uses the following VPValues:
/// - For load: Address, optional mask
/// - For store: Address, stored value, optional mask
/// TODO: We currently execute only per-part unless a specific instance is
/// provided.
class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
private:
Instruction &Instr;
VPUser User;

void setMask(VPValue *Mask) {
if (!Mask)
return;
User.addOperand(Mask);
}

bool isMasked() const {
return (isa<LoadInst>(Instr) && User.getNumOperands() == 2) ||
(isa<StoreInst>(Instr) && User.getNumOperands() == 3);
}

public:
VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Addr,
VPValue *Mask)
: VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr), User({Addr}) {
if (Mask)
User.addOperand(Mask);
VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
: VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Load), User({Addr}) {
setMask(Mask);
}

VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
VPValue *StoredValue, VPValue *Mask)
: VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Store),
User({Addr, StoredValue}) {
setMask(Mask);
}

/// Method to support type inquiry through isa, cast, and dyn_cast.
Expand All @@ -1089,8 +1108,15 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
VPValue *getMask() const {
// Mask is optional and therefore the last, currently 2nd operand.
return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
// Mask is optional and therefore the last operand.
return isMasked() ? User.getOperand(User.getNumOperands() - 1) : nullptr;
}

/// Return the address accessed by this recipe.
VPValue *getStoredValue() const {
assert(isa<StoreInst>(Instr) &&
"Stored value only available for store instructions");
return User.getOperand(1); // Stored value is the 2nd, mandatory operand.
}

/// Generate the wide load/store.
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Expand Up @@ -55,10 +55,14 @@ void VPlanTransforms::VPInstructionsToVPRecipes(

VPRecipeBase *NewRecipe = nullptr;
// Create VPWidenMemoryInstructionRecipe for loads and stores.
if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
NewRecipe = new VPWidenMemoryInstructionRecipe(
*Inst, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
*Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
nullptr /*Mask*/);
else if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
NewRecipe = new VPWidenMemoryInstructionRecipe(
*Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/);
else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
InductionDescriptor II = Inductions.lookup(Phi);
if (II.getKind() == InductionDescriptor::IK_IntInduction ||
Expand Down

0 comments on commit 078c863

Please sign in to comment.