Skip to content

Commit

Permalink
[DAG]Introduce llvm::processShuffleMasks and use it for shuffles in D…
Browse files Browse the repository at this point in the history
…AG Type Legalizer.

We can process the long shuffles (working across several actual
vector registers) in the best way if we take the actual register
represantion into account. We can build more correct representation of
register shuffles, improve number of recognised buildvector sequences.
Also, same function can be used to improve the cost model for the
shuffles. in future patches.

Part of D100486

Differential Revision: https://reviews.llvm.org/D115653
  • Loading branch information
alexey-bataev committed Apr 20, 2022
1 parent d7565de commit 2f49163
Show file tree
Hide file tree
Showing 41 changed files with 11,998 additions and 10,423 deletions.
18 changes: 18 additions & 0 deletions llvm/include/llvm/Analysis/VectorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,24 @@ void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
SmallVectorImpl<int> &ScaledMask);

/// Splits and processes shuffle mask depending on the number of input and
/// output registers. The function does 2 main things: 1) splits the
/// source/destination vectors into real registers; 2) do the mask analysis to
/// identify which real registers are permuted. Then the function processes
/// resulting registers mask using provided action items. If no input register
/// is defined, \p NoInputAction action is used. If only 1 input register is
/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to
/// process > 2 input registers and masks.
/// \param Mask Original shuffle mask.
/// \param NumOfSrcRegs Number of source registers.
/// \param NumOfDestRegs Number of destination registers.
/// \param NumOfUsedRegs Number of actually used destination registers.
void processShuffleMasks(
ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
function_ref<void(ArrayRef<int>, unsigned)> SingleInputAction,
function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction);

/// Compute a map of integer instructions to their minimum legal type
/// size.
///
Expand Down
110 changes: 110 additions & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,116 @@ bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
return true;
}

void llvm::processShuffleMasks(
ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
function_ref<void(ArrayRef<int>, unsigned)> SingleInputAction,
function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
// Try to perform better estimation of the permutation.
// 1. Split the source/destination vectors into real registers.
// 2. Do the mask analysis to identify which real registers are
// permuted.
int Sz = Mask.size();
unsigned SzDest = Sz / NumOfDestRegs;
unsigned SzSrc = Sz / NumOfSrcRegs;
for (unsigned I = 0; I < NumOfDestRegs; ++I) {
auto &RegMasks = Res[I];
RegMasks.assign(NumOfSrcRegs, {});
// Check that the values in dest registers are in the one src
// register.
for (unsigned K = 0; K < SzDest; ++K) {
int Idx = I * SzDest + K;
if (Idx == Sz)
break;
if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
continue;
int SrcRegIdx = Mask[Idx] / SzSrc;
// Add a cost of PermuteTwoSrc for each new source register permute,
// if we have more than one source registers.
if (RegMasks[SrcRegIdx].empty())
RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
}
}
// Process split mask.
for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
auto &Dest = Res[I];
int NumSrcRegs =
count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
switch (NumSrcRegs) {
case 0:
// No input vectors were used!
NoInputAction();
break;
case 1: {
// Find the only mask with at least single undef mask elem.
auto *It =
find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
unsigned SrcReg = std::distance(Dest.begin(), It);
SingleInputAction(*It, SrcReg);
break;
}
default: {
// The first mask is a permutation of a single register. Since we have >2
// input registers to shuffle, we merge the masks for 2 first registers
// and generate a shuffle of 2 registers rather than the reordering of the
// first register and then shuffle with the second register. Next,
// generate the shuffles of the resulting register + the remaining
// registers from the list.
auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
ArrayRef<int> SecondMask) {
for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
if (SecondMask[Idx] != UndefMaskElem) {
assert(FirstMask[Idx] == UndefMaskElem &&
"Expected undefined mask element.");
FirstMask[Idx] = SecondMask[Idx] + VF;
}
}
};
auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
if (Mask[Idx] != UndefMaskElem)
Mask[Idx] = Idx;
}
};
int SecondIdx;
do {
int FirstIdx = -1;
SecondIdx = -1;
MutableArrayRef<int> FirstMask, SecondMask;
for (unsigned I = 0; I < NumOfDestRegs; ++I) {
SmallVectorImpl<int> &RegMask = Dest[I];
if (RegMask.empty())
continue;

if (FirstIdx == SecondIdx) {
FirstIdx = I;
FirstMask = RegMask;
continue;
}
SecondIdx = I;
SecondMask = RegMask;
CombineMasks(FirstMask, SecondMask);
ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
NormalizeMask(FirstMask);
RegMask.clear();
SecondMask = FirstMask;
SecondIdx = FirstIdx;
}
if (FirstIdx != SecondIdx && SecondIdx >= 0) {
CombineMasks(SecondMask, FirstMask);
ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
Dest[FirstIdx].clear();
NormalizeMask(SecondMask);
}
} while (SecondIdx >= 0);
break;
}
}
}
}

MapVector<Instruction *, uint64_t>
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
const TargetTransformInfo *TTI) {
Expand Down
39 changes: 30 additions & 9 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20413,18 +20413,39 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
int Left = 2 * In;
int Right = 2 * In + 1;
SmallVector<int, 8> Mask(NumElems, -1);
for (unsigned i = 0; i != NumElems; ++i) {
if (VectorMask[i] == Left) {
Mask[i] = i;
VectorMask[i] = In;
} else if (VectorMask[i] == Right) {
Mask[i] = i + NumElems;
VectorMask[i] = In;
SDValue L = Shuffles[Left];
ArrayRef<int> LMask;
bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
L.use_empty() && L.getOperand(1).isUndef() &&
L.getOperand(0).getValueType() == L.getValueType();
if (IsLeftShuffle) {
LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
L = L.getOperand(0);
}
SDValue R = Shuffles[Right];
ArrayRef<int> RMask;
bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
R.use_empty() && R.getOperand(1).isUndef() &&
R.getOperand(0).getValueType() == R.getValueType();
if (IsRightShuffle) {
RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
R = R.getOperand(0);
}
for (unsigned I = 0; I != NumElems; ++I) {
if (VectorMask[I] == Left) {
Mask[I] = I;
if (IsLeftShuffle)
Mask[I] = LMask[I];
VectorMask[I] = In;
} else if (VectorMask[I] == Right) {
Mask[I] = I + NumElems;
if (IsRightShuffle)
Mask[I] = RMask[I] + NumElems;
VectorMask[I] = In;
}
}

Shuffles[In] =
DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
}
}
return Shuffles[0];
Expand Down
Loading

0 comments on commit 2f49163

Please sign in to comment.