diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 7a3fe31585b96..7bf6493046882 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2068,7 +2068,10 @@ lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef Mask, MVT VT, const auto &Begin = Mask.begin(); const auto &End = Mask.end(); - unsigned HalfSize = Mask.size() / 2; + int HalfSize = Mask.size() / 2; + + if (SplatIndex >= HalfSize) + return SDValue(); assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); if (fitsRegularPattern(Begin, 1, End - HalfSize, SplatIndex, 0) && @@ -2363,8 +2366,10 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef Mask, /// The first case is the closest to LoongArch instructions and the other /// cases need to be converted to it for processing. /// -/// This function may modify V1, V2 and Mask -static void canonicalizeShuffleVectorByLane( +/// This function will return true for the last three cases above and will +/// modify V1, V2 and Mask. Otherwise, return false for the first case and +/// cross-lane shuffle cases. +static bool canonicalizeShuffleVectorByLane( const SDLoc &DL, MutableArrayRef Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { @@ -2388,15 +2393,15 @@ static void canonicalizeShuffleVectorByLane( preMask = LowLaneTy; if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { - return M < 0 || (M >= 0 && M < HalfSize) || - (M >= MaskSize && M < MaskSize + HalfSize); + return M < 0 || (M >= HalfSize && M < MaskSize) || + (M >= MaskSize + HalfSize && M < MaskSize * 2); })) - postMask = HighLaneTy; + postMask = LowLaneTy; else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { - return M < 0 || (M >= HalfSize && M < MaskSize) || - (M >= MaskSize + HalfSize && M < MaskSize * 2); + return M < 0 || (M >= 0 && M < HalfSize) || + (M >= MaskSize && M < MaskSize + HalfSize); })) - postMask = LowLaneTy; + postMask = HighLaneTy; // The pre-half of mask is high lane type, and the post-half of mask // is low lane type, which is closest to the LoongArch instructions. @@ -2405,7 +2410,7 @@ static void canonicalizeShuffleVectorByLane( // to the lower 128-bit of vector register, and the low lane of mask // corresponds the higher 128-bit of vector register. if (preMask == HighLaneTy && postMask == LowLaneTy) { - return; + return false; } if (preMask == LowLaneTy && postMask == HighLaneTy) { V1 = DAG.getBitcast(MVT::v4i64, V1); @@ -2459,8 +2464,10 @@ static void canonicalizeShuffleVectorByLane( *it = *it < 0 ? *it : *it + HalfSize; } } else { // cross-lane - return; + return false; } + + return true; } /// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible). @@ -2526,28 +2533,21 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, assert(Mask.size() % 2 == 0 && "Expected even mask size."); assert(Mask.size() >= 4 && "Mask size is less than 4."); - // canonicalize non cross-lane shuffle vector - SmallVector NewMask(Mask); - canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget); - APInt KnownUndef, KnownZero; - computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero); + computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero); APInt Zeroable = KnownUndef | KnownZero; SDValue Result; // TODO: Add more comparison patterns. if (V2.isUndef()) { - if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG, + if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG, + if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG, - Subtarget))) - return Result; - if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT, - V1, V2, DAG))) + if ((Result = + lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; // TODO: This comment may be enabled in the future to better match the @@ -2557,24 +2557,39 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, // It is recommended not to change the pattern comparison order for better // performance. - if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, - Subtarget, Zeroable))) + if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget, + Zeroable))) return Result; - if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG, + if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; + + // canonicalize non cross-lane shuffle vector + SmallVector NewMask(Mask); + if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget)) + return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget); + + // FIXME: Handling the remaining cases earlier can degrade performance + // in some situations. Further analysis is required to enable more + // effective optimizations. + if (V2.isUndef()) { + if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT, + V1, V2, DAG))) + return Result; + } + if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG)) return NewShuffle; if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG))) diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll index 765473ce166df..30539427a1a0a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll @@ -8,7 +8,7 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: shufflevector_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3 -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 +; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238 ; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1 ; CHECK-NEXT: vextrins.d $vr3, $vr2, 16 ; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll index e498358cf4d19..9afe16d029fb2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll @@ -17,7 +17,6 @@ define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b) { define <32 x i8> @shufflevector_v32i8_undef(<32 x i8> %a) { ; CHECK-LABEL: shufflevector_v32i8_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 ; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 ; CHECK-NEXT: ret %c = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) { define <16 x i16> @shufflevector_v16i16_undef(<16 x i16> %a) { ; CHECK-LABEL: shufflevector_v16i16_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 ; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 3 ; CHECK-NEXT: ret %c = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shufflevector_v8i32_undef(<8 x i32> %a) { ; CHECK-LABEL: shufflevector_v8i32_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 ; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 2 ; CHECK-NEXT: ret %c = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> @@ -84,7 +81,6 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shufflevector_v4i64_undef(<4 x i64> %a) { ; CHECK-LABEL: shufflevector_v4i64_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 ; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 ; CHECK-NEXT: ret %c = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> @@ -105,7 +101,7 @@ define <8 x float> @shufflevector_v8f32(<8 x float> %a, <8 x float> %b) { define <8 x float> @shufflevector_v8f32_undef(<8 x float> %a) { ; CHECK-LABEL: shufflevector_v8f32_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 238 ; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 1 ; CHECK-NEXT: ret %c = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> @@ -126,7 +122,6 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { define <4 x double> @shufflevector_v4f64_undef(<4 x double> %a) { ; CHECK-LABEL: shufflevector_v4f64_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 ; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0 ; CHECK-NEXT: ret %c = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32>