Skip to content

Commit

Permalink
[VPlan, SLP] Add simple SLP analysis on top of VPlan.
Browse files Browse the repository at this point in the history
This patch adds an initial implementation of the look-ahead SLP tree
construction described in 'Look-Ahead SLP: Auto-vectorization in the Presence
of Commutative Operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
Luís F. W. Góes'.

It returns an SLP tree represented as VPInstructions, with combined
instructions represented as a single, wider VPInstruction.

This initial version does not support instructions with multiple
different users (either inside or outside the SLP tree) or
non-instruction operands; it won't generate any shuffles or
insertelement instructions.

It also just adds the analysis that builds an SLP tree rooted in a set
of stores. It does not include any cost modeling or memory legality
checks. The plan is to integrate it with VPlan based cost modeling, once
available and to only apply it to operations that can be widened.

A follow-up patch will add a support for replacing instructions in a
VPlan with their SLP counter parts.

Reviewers: Ayal, mssimpso, rengolin, mkuper, hfinkel, hsaito, dcaballe, vporpo, RKSimon, ABataev

Reviewed By: rengolin

Differential Revision: https://reviews.llvm.org/D4949

llvm-svn: 346857
  • Loading branch information
fhahn committed Nov 14, 2018
1 parent 3f82b15 commit 09e516c
Show file tree
Hide file tree
Showing 7 changed files with 1,523 additions and 1 deletion.
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/CMakeLists.txt
Expand Up @@ -7,6 +7,7 @@ add_llvm_library(LLVMVectorize
VPlan.cpp
VPlanHCFGBuilder.cpp
VPlanHCFGTransforms.cpp
VPlanSLP.cpp
VPlanVerifier.cpp

ADDITIONAL_HEADER_DIRS
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Expand Up @@ -338,6 +338,12 @@ void VPInstruction::print(raw_ostream &O) const {
case VPInstruction::ICmpULE:
O << "icmp ule";
break;
case VPInstruction::SLPLoad:
O << "combined load";
break;
case VPInstruction::SLPStore:
O << "combined store";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
Expand Down Expand Up @@ -681,6 +687,13 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,

template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);

void VPValue::replaceAllUsesWith(VPValue *New) {
for (VPUser *User : users())
for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
if (User->getOperand(I) == this)
User->setOperand(I, New);
}

void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
Old2NewTy &Old2New,
InterleavedAccessInfo &IAI) {
Expand Down
125 changes: 124 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.h
Expand Up @@ -60,6 +60,7 @@ class Value;
class VPBasicBlock;
class VPRegionBlock;
class VPlan;
class VPlanSlp;

/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
Expand Down Expand Up @@ -609,10 +610,16 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
/// the VPInstruction is also a single def-use vertex.
class VPInstruction : public VPUser, public VPRecipeBase {
friend class VPlanHCFGTransforms;
friend class VPlanSlp;

public:
/// VPlan opcodes, extending LLVM IR with idiomatics instructions.
enum { Not = Instruction::OtherOpsEnd + 1, ICmpULE };
enum {
Not = Instruction::OtherOpsEnd + 1,
ICmpULE,
SLPLoad,
SLPStore,
};

private:
typedef unsigned char OpcodeTy;
Expand All @@ -622,6 +629,13 @@ class VPInstruction : public VPUser, public VPRecipeBase {
/// modeled instruction.
void generateInstruction(VPTransformState &State, unsigned Part);

protected:
Instruction *getUnderlyingInstr() {
return cast_or_null<Instruction>(getUnderlyingValue());
}

void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }

public:
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
: VPUser(VPValue::VPInstructionSC, Operands),
Expand All @@ -635,6 +649,11 @@ class VPInstruction : public VPUser, public VPRecipeBase {
return V->getVPValueID() == VPValue::VPInstructionSC;
}

VPInstruction *clone() const {
SmallVector<VPValue *, 2> Operands(operands());
return new VPInstruction(Opcode, Operands);
}

/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *R) {
return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
Expand All @@ -652,6 +671,14 @@ class VPInstruction : public VPUser, public VPRecipeBase {

/// Print the VPInstruction.
void print(raw_ostream &O) const;

/// Return true if this instruction may modify memory.
bool mayWriteToMemory() const {
// TODO: we can use attributes of the called function to rule out memory
// modifications.
return Opcode == Instruction::Store || Opcode == Instruction::Call ||
Opcode == Instruction::Invoke || Opcode == SLPStore;
}
};

/// VPWidenRecipe is a recipe for producing a copy of vector type for each
Expand Down Expand Up @@ -1508,6 +1535,102 @@ class VPInterleavedAccessInfo {
}
};

/// Class that maps (parts of) an existing VPlan to trees of combined
/// VPInstructions.
class VPlanSlp {
private:
enum class OpMode { Failed, Load, Opcode };

/// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
/// DenseMap keys.
struct BundleDenseMapInfo {
static SmallVector<VPValue *, 4> getEmptyKey() {
return {reinterpret_cast<VPValue *>(-1)};
}

static SmallVector<VPValue *, 4> getTombstoneKey() {
return {reinterpret_cast<VPValue *>(-2)};
}

static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
}

static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
const SmallVector<VPValue *, 4> &RHS) {
return LHS == RHS;
}
};

/// Mapping of values in the original VPlan to a combined VPInstruction.
DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
BundleToCombined;

VPInterleavedAccessInfo &IAI;

/// Basic block to operate on. For now, only instructions in a single BB are
/// considered.
const VPBasicBlock &BB;

/// Indicates whether we managed to combine all visited instructions or not.
bool CompletelySLP = true;

/// Width of the widest combined bundle in bits.
unsigned WidestBundleBits = 0;

using MultiNodeOpTy =
typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;

// Input operand bundles for the current multi node. Each multi node operand
// bundle contains values not matching the multi node's opcode. They will
// be reordered in reorderMultiNodeOps, once we completed building a
// multi node.
SmallVector<MultiNodeOpTy, 4> MultiNodeOps;

/// Indicates whether we are building a multi node currently.
bool MultiNodeActive = false;

/// Check if we can vectorize Operands together.
bool areVectorizable(ArrayRef<VPValue *> Operands) const;

/// Add combined instruction \p New for the bundle \p Operands.
void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);

/// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
VPInstruction *markFailed();

/// Reorder operands in the multi node to maximize sequential memory access
/// and commutative operations.
SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();

/// Choose the best candidate to use for the lane after \p Last. The set of
/// candidates to choose from are values with an opcode matching \p Last's
/// or loads consecutive to \p Last.
std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
SmallVectorImpl<VPValue *> &Candidates,
VPInterleavedAccessInfo &IAI);

/// Print bundle \p Values to dbgs().
void dumpBundle(ArrayRef<VPValue *> Values);

public:
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}

~VPlanSlp() {
for (auto &KV : BundleToCombined)
delete KV.second;
}

/// Tries to build an SLP tree rooted at \p Operands and returns a
/// VPInstruction combining \p Operands, if they can be combined.
VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);

/// Return the width of the widest combined bundle in bits.
unsigned getWidestBundleBits() const { return WidestBundleBits; }

/// Return true if all visited instruction can be combined.
bool isCompletelySLP() const { return CompletelySLP; }
};
} // end namespace llvm

#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H

0 comments on commit 09e516c

Please sign in to comment.