@@ -2068,7 +2068,10 @@ lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2068
2068
2069
2069
const auto &Begin = Mask.begin ();
2070
2070
const auto &End = Mask.end ();
2071
- unsigned HalfSize = Mask.size () / 2 ;
2071
+ int HalfSize = Mask.size () / 2 ;
2072
+
2073
+ if (SplatIndex >= HalfSize)
2074
+ return SDValue ();
2072
2075
2073
2076
assert (SplatIndex < (int )Mask.size () && " Out of bounds mask index" );
2074
2077
if (fitsRegularPattern<int >(Begin, 1 , End - HalfSize, SplatIndex, 0 ) &&
@@ -2363,8 +2366,10 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
2363
2366
// / The first case is the closest to LoongArch instructions and the other
2364
2367
// / cases need to be converted to it for processing.
2365
2368
// /
2366
- // / This function may modify V1, V2 and Mask
2367
- static void canonicalizeShuffleVectorByLane (
2369
+ // / This function will return true for the last three cases above and will
2370
+ // / modify V1, V2 and Mask. Otherwise, return false for the first case and
2371
+ // / cross-lane shuffle cases.
2372
+ static bool canonicalizeShuffleVectorByLane (
2368
2373
const SDLoc &DL, MutableArrayRef<int > Mask, MVT VT, SDValue &V1,
2369
2374
SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2370
2375
@@ -2388,15 +2393,15 @@ static void canonicalizeShuffleVectorByLane(
2388
2393
preMask = LowLaneTy;
2389
2394
2390
2395
if (std::all_of (Mask.begin () + HalfSize, Mask.end (), [&](int M) {
2391
- return M < 0 || (M >= 0 && M < HalfSize ) ||
2392
- (M >= MaskSize && M < MaskSize + HalfSize );
2396
+ return M < 0 || (M >= HalfSize && M < MaskSize ) ||
2397
+ (M >= MaskSize + HalfSize && M < MaskSize * 2 );
2393
2398
}))
2394
- postMask = HighLaneTy ;
2399
+ postMask = LowLaneTy ;
2395
2400
else if (std::all_of (Mask.begin () + HalfSize, Mask.end (), [&](int M) {
2396
- return M < 0 || (M >= HalfSize && M < MaskSize ) ||
2397
- (M >= MaskSize + HalfSize && M < MaskSize * 2 );
2401
+ return M < 0 || (M >= 0 && M < HalfSize ) ||
2402
+ (M >= MaskSize && M < MaskSize + HalfSize );
2398
2403
}))
2399
- postMask = LowLaneTy ;
2404
+ postMask = HighLaneTy ;
2400
2405
2401
2406
// The pre-half of mask is high lane type, and the post-half of mask
2402
2407
// is low lane type, which is closest to the LoongArch instructions.
@@ -2405,7 +2410,7 @@ static void canonicalizeShuffleVectorByLane(
2405
2410
// to the lower 128-bit of vector register, and the low lane of mask
2406
2411
// corresponds the higher 128-bit of vector register.
2407
2412
if (preMask == HighLaneTy && postMask == LowLaneTy) {
2408
- return ;
2413
+ return false ;
2409
2414
}
2410
2415
if (preMask == LowLaneTy && postMask == HighLaneTy) {
2411
2416
V1 = DAG.getBitcast (MVT::v4i64, V1);
@@ -2459,8 +2464,10 @@ static void canonicalizeShuffleVectorByLane(
2459
2464
*it = *it < 0 ? *it : *it + HalfSize;
2460
2465
}
2461
2466
} else { // cross-lane
2462
- return ;
2467
+ return false ;
2463
2468
}
2469
+
2470
+ return true ;
2464
2471
}
2465
2472
2466
2473
// / Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
@@ -2526,28 +2533,21 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2526
2533
assert (Mask.size () % 2 == 0 && " Expected even mask size." );
2527
2534
assert (Mask.size () >= 4 && " Mask size is less than 4." );
2528
2535
2529
- // canonicalize non cross-lane shuffle vector
2530
- SmallVector<int > NewMask (Mask);
2531
- canonicalizeShuffleVectorByLane (DL, NewMask, VT, V1, V2, DAG, Subtarget);
2532
-
2533
2536
APInt KnownUndef, KnownZero;
2534
- computeZeroableShuffleElements (NewMask , V1, V2, KnownUndef, KnownZero);
2537
+ computeZeroableShuffleElements (Mask , V1, V2, KnownUndef, KnownZero);
2535
2538
APInt Zeroable = KnownUndef | KnownZero;
2536
2539
2537
2540
SDValue Result;
2538
2541
// TODO: Add more comparison patterns.
2539
2542
if (V2.isUndef ()) {
2540
- if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI (DL, NewMask , VT, V1, V2, DAG,
2543
+ if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI (DL, Mask , VT, V1, V2, DAG,
2541
2544
Subtarget)))
2542
2545
return Result;
2543
- if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, NewMask , VT, V1, V2, DAG,
2546
+ if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, Mask , VT, V1, V2, DAG,
2544
2547
Subtarget)))
2545
2548
return Result;
2546
- if ((Result = lowerVECTOR_SHUFFLE_XVPERM (DL, NewMask, VT, V1, V2, DAG,
2547
- Subtarget)))
2548
- return Result;
2549
- if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle (DL, NewMask, VT,
2550
- V1, V2, DAG)))
2549
+ if ((Result =
2550
+ lowerVECTOR_SHUFFLE_XVPERM (DL, Mask, VT, V1, V2, DAG, Subtarget)))
2551
2551
return Result;
2552
2552
2553
2553
// TODO: This comment may be enabled in the future to better match the
@@ -2557,24 +2557,39 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2557
2557
2558
2558
// It is recommended not to change the pattern comparison order for better
2559
2559
// performance.
2560
- if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV (DL, NewMask , VT, V1, V2, DAG)))
2560
+ if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV (DL, Mask , VT, V1, V2, DAG)))
2561
2561
return Result;
2562
- if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD (DL, NewMask , VT, V1, V2, DAG)))
2562
+ if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD (DL, Mask , VT, V1, V2, DAG)))
2563
2563
return Result;
2564
- if ((Result = lowerVECTOR_SHUFFLE_XVILVH (DL, NewMask , VT, V1, V2, DAG)))
2564
+ if ((Result = lowerVECTOR_SHUFFLE_XVILVH (DL, Mask , VT, V1, V2, DAG)))
2565
2565
return Result;
2566
- if ((Result = lowerVECTOR_SHUFFLE_XVILVL (DL, NewMask , VT, V1, V2, DAG)))
2566
+ if ((Result = lowerVECTOR_SHUFFLE_XVILVL (DL, Mask , VT, V1, V2, DAG)))
2567
2567
return Result;
2568
- if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV (DL, NewMask , VT, V1, V2, DAG)))
2568
+ if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV (DL, Mask , VT, V1, V2, DAG)))
2569
2569
return Result;
2570
- if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, NewMask , VT, V1, V2, DAG)))
2570
+ if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, Mask , VT, V1, V2, DAG)))
2571
2571
return Result;
2572
- if ((Result = lowerVECTOR_SHUFFLEAsShift (DL, NewMask , VT, V1, V2, DAG,
2573
- Subtarget, Zeroable)))
2572
+ if ((Result = lowerVECTOR_SHUFFLEAsShift (DL, Mask , VT, V1, V2, DAG, Subtarget ,
2573
+ Zeroable)))
2574
2574
return Result;
2575
- if ((Result = lowerVECTOR_SHUFFLEAsByteRotate (DL, NewMask , VT, V1, V2, DAG,
2575
+ if ((Result = lowerVECTOR_SHUFFLEAsByteRotate (DL, Mask , VT, V1, V2, DAG,
2576
2576
Subtarget)))
2577
2577
return Result;
2578
+
2579
+ // canonicalize non cross-lane shuffle vector
2580
+ SmallVector<int > NewMask (Mask);
2581
+ if (canonicalizeShuffleVectorByLane (DL, NewMask, VT, V1, V2, DAG, Subtarget))
2582
+ return lower256BitShuffle (DL, NewMask, VT, V1, V2, DAG, Subtarget);
2583
+
2584
+ // FIXME: Handling the remaining cases earlier can degrade performance
2585
+ // in some situations. Further analysis is required to enable more
2586
+ // effective optimizations.
2587
+ if (V2.isUndef ()) {
2588
+ if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle (DL, NewMask, VT,
2589
+ V1, V2, DAG)))
2590
+ return Result;
2591
+ }
2592
+
2578
2593
if (SDValue NewShuffle = widenShuffleMask (DL, NewMask, VT, V1, V2, DAG))
2579
2594
return NewShuffle;
2580
2595
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF (DL, NewMask, VT, V1, V2, DAG)))
0 commit comments