@@ -2060,7 +2060,10 @@ lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2060
2060
2061
2061
const auto &Begin = Mask.begin ();
2062
2062
const auto &End = Mask.end ();
2063
- unsigned HalfSize = Mask.size () / 2 ;
2063
+ int HalfSize = Mask.size () / 2 ;
2064
+
2065
+ if (SplatIndex >= HalfSize)
2066
+ return SDValue ();
2064
2067
2065
2068
assert (SplatIndex < (int )Mask.size () && " Out of bounds mask index" );
2066
2069
if (fitsRegularPattern<int >(Begin, 1 , End - HalfSize, SplatIndex, 0 ) &&
@@ -2354,7 +2357,7 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
2354
2357
// / cases need to be converted to it for processing.
2355
2358
// /
2356
2359
// / This function may modify V1, V2 and Mask
2357
- static void canonicalizeShuffleVectorByLane (
2360
+ static bool canonicalizeShuffleVectorByLane (
2358
2361
const SDLoc &DL, MutableArrayRef<int > Mask, MVT VT, SDValue &V1,
2359
2362
SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2360
2363
@@ -2378,15 +2381,15 @@ static void canonicalizeShuffleVectorByLane(
2378
2381
preMask = LowLaneTy;
2379
2382
2380
2383
if (std::all_of (Mask.begin () + HalfSize, Mask.end (), [&](int M) {
2381
- return M < 0 || (M >= 0 && M < HalfSize ) ||
2382
- (M >= MaskSize && M < MaskSize + HalfSize );
2384
+ return M < 0 || (M >= HalfSize && M < MaskSize ) ||
2385
+ (M >= MaskSize + HalfSize && M < MaskSize * 2 );
2383
2386
}))
2384
- postMask = HighLaneTy ;
2387
+ postMask = LowLaneTy ;
2385
2388
else if (std::all_of (Mask.begin () + HalfSize, Mask.end (), [&](int M) {
2386
- return M < 0 || (M >= HalfSize && M < MaskSize ) ||
2387
- (M >= MaskSize + HalfSize && M < MaskSize * 2 );
2389
+ return M < 0 || (M >= 0 && M < HalfSize ) ||
2390
+ (M >= MaskSize && M < MaskSize + HalfSize );
2388
2391
}))
2389
- postMask = LowLaneTy ;
2392
+ postMask = HighLaneTy ;
2390
2393
2391
2394
// The pre-half of mask is high lane type, and the post-half of mask
2392
2395
// is low lane type, which is closest to the LoongArch instructions.
@@ -2395,7 +2398,7 @@ static void canonicalizeShuffleVectorByLane(
2395
2398
// to the lower 128-bit of vector register, and the low lane of mask
2396
2399
// corresponds the higher 128-bit of vector register.
2397
2400
if (preMask == HighLaneTy && postMask == LowLaneTy) {
2398
- return ;
2401
+ return false ;
2399
2402
}
2400
2403
if (preMask == LowLaneTy && postMask == HighLaneTy) {
2401
2404
V1 = DAG.getBitcast (MVT::v4i64, V1);
@@ -2449,8 +2452,10 @@ static void canonicalizeShuffleVectorByLane(
2449
2452
*it = *it < 0 ? *it : *it + HalfSize;
2450
2453
}
2451
2454
} else { // cross-lane
2452
- return ;
2455
+ return false ;
2453
2456
}
2457
+
2458
+ return true ;
2454
2459
}
2455
2460
2456
2461
// / Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
@@ -2516,27 +2521,20 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2516
2521
assert (Mask.size () % 2 == 0 && " Expected even mask size." );
2517
2522
assert (Mask.size () >= 4 && " Mask size is less than 4." );
2518
2523
2519
- // canonicalize non cross-lane shuffle vector
2520
- SmallVector<int > NewMask (Mask);
2521
- canonicalizeShuffleVectorByLane (DL, NewMask, VT, V1, V2, DAG, Subtarget);
2522
-
2523
2524
APInt KnownUndef, KnownZero;
2524
- computeZeroableShuffleElements (NewMask , V1, V2, KnownUndef, KnownZero);
2525
+ computeZeroableShuffleElements (Mask , V1, V2, KnownUndef, KnownZero);
2525
2526
APInt Zeroable = KnownUndef | KnownZero;
2526
2527
2527
2528
SDValue Result;
2528
2529
// TODO: Add more comparison patterns.
2529
2530
if (V2.isUndef ()) {
2530
- if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI (DL, NewMask , VT, V1, V2, DAG,
2531
+ if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI (DL, Mask , VT, V1, V2, DAG,
2531
2532
Subtarget)))
2532
2533
return Result;
2533
- if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, NewMask , VT, V1, V2, DAG,
2534
+ if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, Mask , VT, V1, V2, DAG,
2534
2535
Subtarget)))
2535
2536
return Result;
2536
- if ((Result = lowerVECTOR_SHUFFLE_XVPERM (DL, NewMask, VT, V1, V2, DAG)))
2537
- return Result;
2538
- if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle (DL, NewMask, VT,
2539
- V1, V2, DAG)))
2537
+ if ((Result = lowerVECTOR_SHUFFLE_XVPERM (DL, Mask, VT, V1, V2, DAG)))
2540
2538
return Result;
2541
2539
2542
2540
// TODO: This comment may be enabled in the future to better match the
@@ -2546,24 +2544,36 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2546
2544
2547
2545
// It is recommended not to change the pattern comparison order for better
2548
2546
// performance.
2549
- if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV (DL, NewMask , VT, V1, V2, DAG)))
2547
+ if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV (DL, Mask , VT, V1, V2, DAG)))
2550
2548
return Result;
2551
- if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD (DL, NewMask , VT, V1, V2, DAG)))
2549
+ if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD (DL, Mask , VT, V1, V2, DAG)))
2552
2550
return Result;
2553
- if ((Result = lowerVECTOR_SHUFFLE_XVILVH (DL, NewMask , VT, V1, V2, DAG)))
2551
+ if ((Result = lowerVECTOR_SHUFFLE_XVILVH (DL, Mask , VT, V1, V2, DAG)))
2554
2552
return Result;
2555
- if ((Result = lowerVECTOR_SHUFFLE_XVILVL (DL, NewMask , VT, V1, V2, DAG)))
2553
+ if ((Result = lowerVECTOR_SHUFFLE_XVILVL (DL, Mask , VT, V1, V2, DAG)))
2556
2554
return Result;
2557
- if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV (DL, NewMask , VT, V1, V2, DAG)))
2555
+ if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV (DL, Mask , VT, V1, V2, DAG)))
2558
2556
return Result;
2559
- if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, NewMask , VT, V1, V2, DAG)))
2557
+ if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, Mask , VT, V1, V2, DAG)))
2560
2558
return Result;
2561
- if ((Result = lowerVECTOR_SHUFFLEAsShift (DL, NewMask , VT, V1, V2, DAG,
2562
- Subtarget, Zeroable)))
2559
+ if ((Result = lowerVECTOR_SHUFFLEAsShift (DL, Mask , VT, V1, V2, DAG, Subtarget ,
2560
+ Zeroable)))
2563
2561
return Result;
2564
- if ((Result = lowerVECTOR_SHUFFLEAsByteRotate (DL, NewMask , VT, V1, V2, DAG,
2562
+ if ((Result = lowerVECTOR_SHUFFLEAsByteRotate (DL, Mask , VT, V1, V2, DAG,
2565
2563
Subtarget)))
2566
2564
return Result;
2565
+
2566
+ // canonicalize non cross-lane shuffle vector
2567
+ SmallVector<int > NewMask (Mask);
2568
+ if (canonicalizeShuffleVectorByLane (DL, NewMask, VT, V1, V2, DAG, Subtarget))
2569
+ return lower256BitShuffle (DL, NewMask, VT, V1, V2, DAG, Subtarget);
2570
+
2571
+ if (V2.isUndef ()) {
2572
+ if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle (DL, NewMask, VT,
2573
+ V1, V2, DAG)))
2574
+ return Result;
2575
+ }
2576
+
2567
2577
if (SDValue NewShuffle = widenShuffleMask (DL, NewMask, VT, V1, V2, DAG))
2568
2578
return NewShuffle;
2569
2579
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF (DL, NewMask, VT, V1, V2, DAG)))
0 commit comments