Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23476,6 +23476,72 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
return DAG.getSplat(VT, DL, InVal);

// Check if this operation is illegal and will be handled the default way.
if (TLI.getTypeAction(*DAG.getContext(), VT) ==
TargetLowering::TypeSplitVector ||
TLI.isOperationExpand(ISD::INSERT_VECTOR_ELT, VT)) {
// For each dynamic insertelt, the default way will save the vector to
// the stack, store at an offset, and load the modified vector. This can
// dramatically increase code size if we have a chain of insertelts on a
// large vector: requiring O(V*C) stores/loads where V = length of
// vector and C is length of chain. If each insertelt is only fed into the
// next, the vector is write-only across this chain, and we can just
// save once before the chain and load after in O(V + C) operations.
SmallVector<SDNode *> Seq{N};
unsigned NumDynamic = 1;
while (true) {
SDValue InVec = Seq.back()->getOperand(0);
if (InVec.getOpcode() != ISD::INSERT_VECTOR_ELT)
break;
Seq.push_back(InVec.getNode());
NumDynamic += !isa<ConstantSDNode>(InVec.getOperand(2));
}

// We will lower every insertelt in the sequence to a store. In the
// default handling, only dynamic insertelts in the sequence will be
// lowered to a store (+ vector save/load for each). Check that our
// approach reduces the total number of loads and stores over the default.
if (2 * VT.getVectorMinNumElements() + Seq.size() <
NumDynamic * (2 * VT.getVectorMinNumElements() + 1)) {
// In cases where the vector is illegal it will be broken down into
// parts and stored in parts - we should use the alignment for the
// smallest part.
Align SmallestAlign = DAG.getReducedAlign(VT, /*UseABI=*/false);
SDValue StackPtr =
DAG.CreateStackTemporary(VT.getStoreSize(), SmallestAlign);
auto &MF = DAG.getMachineFunction();
int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);

// Save the vector to the stack
SDValue InVec = Seq.back()->getOperand(0);
SDValue Store = DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr,
PtrInfo, SmallestAlign);

// Lower each dynamic insertelt to a store
for (SDNode *N : reverse(Seq)) {
SDValue Elmnt = N->getOperand(1);
SDValue Index = N->getOperand(2);

// Store the new element. This may be larger than the vector element
// type, so use a truncating store.
SDValue EltPtr =
TLI.getVectorElementPointer(DAG, StackPtr, VT, Index);
EVT EltVT = Elmnt.getValueType();
Store = DAG.getTruncStore(
Store, DL, Elmnt, EltPtr, MachinePointerInfo::getUnknownStack(MF),
EltVT,
commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
}

// Load the saved vector from the stack
SDValue Load =
DAG.getLoad(VT, DL, Store, StackPtr, PtrInfo, SmallestAlign);
return Load.getValue(0);
}
}

return SDValue();
}

Expand Down
Loading