Skip to content

Commit 86d0a42

Browse files
committed
[LoongArch] Refine 256-bit vector_shuffle legalization for LASX
1 parent cecdff9 commit 86d0a42

File tree

2 files changed

+41
-31
lines changed

2 files changed

+41
-31
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,7 +2060,10 @@ lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
20602060

20612061
const auto &Begin = Mask.begin();
20622062
const auto &End = Mask.end();
2063-
unsigned HalfSize = Mask.size() / 2;
2063+
int HalfSize = Mask.size() / 2;
2064+
2065+
if (SplatIndex >= HalfSize)
2066+
return SDValue();
20642067

20652068
assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
20662069
if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
@@ -2354,7 +2357,7 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
23542357
/// cases need to be converted to it for processing.
23552358
///
23562359
/// This function may modify V1, V2 and Mask
2357-
static void canonicalizeShuffleVectorByLane(
2360+
static bool canonicalizeShuffleVectorByLane(
23582361
const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
23592362
SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
23602363

@@ -2378,15 +2381,15 @@ static void canonicalizeShuffleVectorByLane(
23782381
preMask = LowLaneTy;
23792382

23802383
if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2381-
return M < 0 || (M >= 0 && M < HalfSize) ||
2382-
(M >= MaskSize && M < MaskSize + HalfSize);
2384+
return M < 0 || (M >= HalfSize && M < MaskSize) ||
2385+
(M >= MaskSize + HalfSize && M < MaskSize * 2);
23832386
}))
2384-
postMask = HighLaneTy;
2387+
postMask = LowLaneTy;
23852388
else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2386-
return M < 0 || (M >= HalfSize && M < MaskSize) ||
2387-
(M >= MaskSize + HalfSize && M < MaskSize * 2);
2389+
return M < 0 || (M >= 0 && M < HalfSize) ||
2390+
(M >= MaskSize && M < MaskSize + HalfSize);
23882391
}))
2389-
postMask = LowLaneTy;
2392+
postMask = HighLaneTy;
23902393

23912394
// The pre-half of mask is high lane type, and the post-half of mask
23922395
// is low lane type, which is closest to the LoongArch instructions.
@@ -2395,7 +2398,7 @@ static void canonicalizeShuffleVectorByLane(
23952398
// to the lower 128-bit of vector register, and the low lane of mask
23962399
// corresponds the higher 128-bit of vector register.
23972400
if (preMask == HighLaneTy && postMask == LowLaneTy) {
2398-
return;
2401+
return false;
23992402
}
24002403
if (preMask == LowLaneTy && postMask == HighLaneTy) {
24012404
V1 = DAG.getBitcast(MVT::v4i64, V1);
@@ -2449,8 +2452,10 @@ static void canonicalizeShuffleVectorByLane(
24492452
*it = *it < 0 ? *it : *it + HalfSize;
24502453
}
24512454
} else { // cross-lane
2452-
return;
2455+
return false;
24532456
}
2457+
2458+
return true;
24542459
}
24552460

24562461
/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
@@ -2516,27 +2521,20 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25162521
assert(Mask.size() % 2 == 0 && "Expected even mask size.");
25172522
assert(Mask.size() >= 4 && "Mask size is less than 4.");
25182523

2519-
// canonicalize non cross-lane shuffle vector
2520-
SmallVector<int> NewMask(Mask);
2521-
canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2522-
25232524
APInt KnownUndef, KnownZero;
2524-
computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2525+
computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
25252526
APInt Zeroable = KnownUndef | KnownZero;
25262527

25272528
SDValue Result;
25282529
// TODO: Add more comparison patterns.
25292530
if (V2.isUndef()) {
2530-
if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG,
2531+
if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, V2, DAG,
25312532
Subtarget)))
25322533
return Result;
2533-
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
2534+
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
25342535
Subtarget)))
25352536
return Result;
2536-
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
2537-
return Result;
2538-
if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2539-
V1, V2, DAG)))
2537+
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG)))
25402538
return Result;
25412539

25422540
// TODO: This comment may be enabled in the future to better match the
@@ -2546,24 +2544,36 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25462544

25472545
// It is recommended not to change the pattern comparison order for better
25482546
// performance.
2549-
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2547+
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
25502548
return Result;
2551-
if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2549+
if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
25522550
return Result;
2553-
if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2551+
if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
25542552
return Result;
2555-
if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2553+
if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
25562554
return Result;
2557-
if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2555+
if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
25582556
return Result;
2559-
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2557+
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
25602558
return Result;
2561-
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG,
2562-
Subtarget, Zeroable)))
2559+
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2560+
Zeroable)))
25632561
return Result;
2564-
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG,
2562+
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
25652563
Subtarget)))
25662564
return Result;
2565+
2566+
// canonicalize non cross-lane shuffle vector
2567+
SmallVector<int> NewMask(Mask);
2568+
if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2569+
return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2570+
2571+
if (V2.isUndef()) {
2572+
if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2573+
V1, V2, DAG)))
2574+
return Result;
2575+
}
2576+
25672577
if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
25682578
return NewShuffle;
25692579
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
88
; CHECK-LABEL: shufflevector_v4f64:
99
; CHECK: # %bb.0: # %entry
1010
; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3
11-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
11+
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238
1212
; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1
1313
; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
1414
; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2

0 commit comments

Comments
 (0)