Skip to content

Commit

Permalink
[AArch64] Add ZIP and UZP shuffle costs. (#88150)
Browse files Browse the repository at this point in the history
This adds some costs for the shuffle instructions that should be lowered
to zip1/zip2/uzp1/uzp2 instructions.
  • Loading branch information
davemgreen committed Apr 11, 2024
1 parent 3f7f446 commit a536743
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 70 deletions.
29 changes: 0 additions & 29 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11851,35 +11851,6 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
return true;
}

static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
return false;
Idx += 1;
}

return true;
}

static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != 2 * i + WhichResult)
return false;
}

return true;
}

static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
Expand Down
33 changes: 33 additions & 0 deletions llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

#include "llvm/ADT/ArrayRef.h"

namespace llvm {

// 31 entries have cost 0
// 756 entries have cost 1
// 3690 entries have cost 2
Expand Down Expand Up @@ -6618,4 +6620,35 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
return (PFEntry >> 30) + 1;
}

inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
return false;
Idx += 1;
}

return true;
}

inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != 2 * i + WhichResult)
return false;
}

return true;
}

} // namespace llvm

#endif
10 changes: 10 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3932,6 +3932,16 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
}))
return 0;

// Check for other shuffles that are not SK_ kinds but we have native
// instructions for, for example ZIP and UZP.
unsigned Unused;
if (LT.second.isFixedLengthVector() &&
LT.second.getVectorNumElements() == Mask.size() &&
(Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) &&
(isZIPMask(Mask, LT.second, Unused) ||
isUZPMask(Mask, LT.second, Unused)))
return 1;

if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
Kind == TTI::SK_Reverse || Kind == TTI::SK_Splice) {
Expand Down

0 comments on commit a536743

Please sign in to comment.