From 80abdb7318bc7e1ff7bc4ea1f6ff0b1cc4a9813e Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Tue, 23 Sep 2025 16:02:12 +0800 Subject: [PATCH 1/4] [LoongArch] Refine 256-bit vector_shuffle legalization for LASX --- .../LoongArch/LoongArchISelLowering.cpp | 73 +++++++++++-------- .../lasx/ir-instruction/fix-xvshuf.ll | 2 +- .../ir-instruction/shuffle-as-xvrepl128vei.ll | 7 +- 3 files changed, 45 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 32baa2d111270..9662fec5ef4b9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2060,7 +2060,10 @@ lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef Mask, MVT VT, const auto &Begin = Mask.begin(); const auto &End = Mask.end(); - unsigned HalfSize = Mask.size() / 2; + int HalfSize = Mask.size() / 2; + + if (SplatIndex >= HalfSize) + return SDValue(); assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); if (fitsRegularPattern(Begin, 1, End - HalfSize, SplatIndex, 0) && @@ -2354,7 +2357,7 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef Mask, /// cases need to be converted to it for processing. /// /// This function may modify V1, V2 and Mask -static void canonicalizeShuffleVectorByLane( +static bool canonicalizeShuffleVectorByLane( const SDLoc &DL, MutableArrayRef Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { @@ -2378,15 +2381,15 @@ static void canonicalizeShuffleVectorByLane( preMask = LowLaneTy; if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { - return M < 0 || (M >= 0 && M < HalfSize) || - (M >= MaskSize && M < MaskSize + HalfSize); + return M < 0 || (M >= HalfSize && M < MaskSize) || + (M >= MaskSize + HalfSize && M < MaskSize * 2); })) - postMask = HighLaneTy; + postMask = LowLaneTy; else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { - return M < 0 || (M >= HalfSize && M < MaskSize) || - (M >= MaskSize + HalfSize && M < MaskSize * 2); + return M < 0 || (M >= 0 && M < HalfSize) || + (M >= MaskSize && M < MaskSize + HalfSize); })) - postMask = LowLaneTy; + postMask = HighLaneTy; // The pre-half of mask is high lane type, and the post-half of mask // is low lane type, which is closest to the LoongArch instructions. @@ -2395,7 +2398,7 @@ static void canonicalizeShuffleVectorByLane( // to the lower 128-bit of vector register, and the low lane of mask // corresponds the higher 128-bit of vector register. if (preMask == HighLaneTy && postMask == LowLaneTy) { - return; + return false; } if (preMask == LowLaneTy && postMask == HighLaneTy) { V1 = DAG.getBitcast(MVT::v4i64, V1); @@ -2449,8 +2452,10 @@ static void canonicalizeShuffleVectorByLane( *it = *it < 0 ? *it : *it + HalfSize; } } else { // cross-lane - return; + return false; } + + return true; } /// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible). @@ -2516,27 +2521,20 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, assert(Mask.size() % 2 == 0 && "Expected even mask size."); assert(Mask.size() >= 4 && "Mask size is less than 4."); - // canonicalize non cross-lane shuffle vector - SmallVector NewMask(Mask); - canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget); - APInt KnownUndef, KnownZero; - computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero); + computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero); APInt Zeroable = KnownUndef | KnownZero; SDValue Result; // TODO: Add more comparison patterns. if (V2.isUndef()) { - if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG, + if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG, + if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG))) - return Result; - if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT, - V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG))) return Result; // TODO: This comment may be enabled in the future to better match the @@ -2546,24 +2544,39 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, // It is recommended not to change the pattern comparison order for better // performance. - if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG))) return Result; - if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, - Subtarget, Zeroable))) + if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget, + Zeroable))) return Result; - if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG, + if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; + + // canonicalize non cross-lane shuffle vector + SmallVector NewMask(Mask); + if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget)) + return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget); + + // FIXME: Handling the remaining cases earlier can degrade performance + // in some situations. Further analysis is required to enable more + // effective optimizations. + if (V2.isUndef()) { + if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT, + V1, V2, DAG))) + return Result; + } + if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG)) return NewShuffle; if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG))) diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll index 765473ce166df..30539427a1a0a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll @@ -8,7 +8,7 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: shufflevector_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3 -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 +; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238 ; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1 ; CHECK-NEXT: vextrins.d $vr3, $vr2, 16 ; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll index 935a30a3e54ed..69bb5899e685f 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll @@ -16,7 +16,6 @@ define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b) { define <32 x i8> @shufflevector_v32i8_undef(<32 x i8> %a) { ; CHECK-LABEL: shufflevector_v32i8_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 ; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 ; CHECK-NEXT: ret %c = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) { define <16 x i16> @shufflevector_v16i16_undef(<16 x i16> %a) { ; CHECK-LABEL: shufflevector_v16i16_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 ; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 3 ; CHECK-NEXT: ret %c = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shufflevector_v8i32_undef(<8 x i32> %a) { ; CHECK-LABEL: shufflevector_v8i32_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 ; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 2 ; CHECK-NEXT: ret %c = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> @@ -83,7 +80,6 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shufflevector_v4i64_undef(<4 x i64> %a) { ; CHECK-LABEL: shufflevector_v4i64_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 ; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 ; CHECK-NEXT: ret %c = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> @@ -104,7 +100,7 @@ define <8 x float> @shufflevector_v8f32(<8 x float> %a, <8 x float> %b) { define <8 x float> @shufflevector_v8f32_undef(<8 x float> %a) { ; CHECK-LABEL: shufflevector_v8f32_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 238 ; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 1 ; CHECK-NEXT: ret %c = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> @@ -125,7 +121,6 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { define <4 x double> @shufflevector_v4f64_undef(<4 x double> %a) { ; CHECK-LABEL: shufflevector_v4f64_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 ; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0 ; CHECK-NEXT: ret %c = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> From 47274d059b280f7721486e4ea6e656adbfe4357f Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Wed, 24 Sep 2025 09:09:25 +0800 Subject: [PATCH 2/4] [LoongArch] Custom legalize vector_shuffle to xvpermi.d when possible --- .../LoongArch/LoongArchISelLowering.cpp | 23 +++++++++ .../lasx/ir-instruction/fix-xvshuf.ll | 9 ++-- .../lasx/shuffle-as-permute-and-shuffle.ll | 48 ++++--------------- .../LoongArch/lasx/vec-shuffle-byte-rotate.ll | 4 +- 4 files changed, 37 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 9662fec5ef4b9..7e313f0b90d34 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2088,6 +2088,26 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef Mask, MVT VT, return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget); } +/// Lower VECTOR_SHUFFLE into XVPERMI (if possible). +static SDValue +lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + // Only consider XVPERMI_D. + if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64)) + return SDValue(); + + unsigned MaskImm = 0; + for (unsigned i = 0; i < Mask.size(); ++i) { + if (Mask[i] == -1) + continue; + MaskImm |= Mask[i] << (i * 2); + } + + return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1, + DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT())); +} + /// Lower VECTOR_SHUFFLE into XVPERM (if possible). static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, @@ -2534,6 +2554,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; + if ((Result = + lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget))) + return Result; if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG))) return Result; diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll index 30539427a1a0a..0b8015ddbdd4a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll @@ -7,13 +7,12 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: shufflevector_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3 -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238 -; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1 -; CHECK-NEXT: vextrins.d $vr3, $vr2, 16 +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 3 +; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3 +; CHECK-NEXT: vextrins.d $vr2, $vr3, 16 ; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2 ; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2 +; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 ; CHECK-NEXT: ret entry: %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll index 5f76d9951df9c..ee1e9f4ce4e5c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll @@ -5,11 +5,8 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) { ; CHECK-LABEL: shuffle_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 +; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 ; CHECK-NEXT: xvshuf.h $xr1, $xr2, $xr0 ; CHECK-NEXT: xvori.b $xr0, $xr1, 0 ; CHECK-NEXT: ret @@ -33,11 +30,8 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) { ; CHECK-LABEL: shuffle_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 +; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 ; CHECK-NEXT: xvshuf.w $xr1, $xr2, $xr0 ; CHECK-NEXT: xvori.b $xr0, $xr1, 0 ; CHECK-NEXT: ret @@ -71,10 +65,7 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) { define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) { ; CHECK-LABEL: shuffle_v8i32_same_lane: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0) -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225 ; CHECK-NEXT: ret %shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> ret <8 x i32> %shuffle @@ -83,14 +74,7 @@ define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) { define <4 x i64> @shuffle_v4i64(<4 x i64> %a) { ; CHECK-LABEL: shuffle_v4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI6_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 -; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> ret <4 x i64> %shuffle @@ -99,10 +83,7 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) { define <4 x i64> @shuffle_v4i64_same_lane(<4 x i64> %a) { ; CHECK-LABEL: shuffle_v4i64_same_lane: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI7_0) -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> ret <4 x i64> %shuffle @@ -135,14 +116,7 @@ define <8 x float> @shuffle_v8f32_same_lane(<8 x float> %a) { define <4 x double> @shuffle_v4f64(<4 x double> %a) { ; CHECK-LABEL: shuffle_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI10_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 -; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> ret <4 x double> %shuffle @@ -151,11 +125,7 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) { define <4 x double> @shuffle_v4f64_same_lane(<4 x double> %a) { ; CHECK-LABEL: shuffle_v4f64_same_lane: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0) -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 75 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> ret <4 x double> %shuffle diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll index b697a2fd07435..eaf33d46a8803 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll @@ -126,9 +126,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind { define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind { ; CHECK-LABEL: byte_rotate_v4i64_3: ; CHECK: # %bb.0: -; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 8 -; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 177 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> ret <4 x i64> %shuffle From bd5608f2ab4fa75e669cee0135c05e9a3b703fc5 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Thu, 25 Sep 2025 09:35:34 +0800 Subject: [PATCH 3/4] address heiher's comment --- .../LoongArch/LoongArchISelLowering.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 7e313f0b90d34..40c8db5c499b9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1596,7 +1596,7 @@ static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, /// value is necessary in order to fit the above form. static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef Mask, MVT VT, - SDValue V1, SDValue V2, SelectionDAG &DAG, + SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { int SplatIndex = -1; for (const auto &M : Mask) { @@ -1989,8 +1989,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue Result; // TODO: Add more comparison patterns. if (V2.isUndef()) { - if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG, - Subtarget))) + if ((Result = + lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget))) return Result; if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) @@ -2045,7 +2045,7 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, /// value is necessary in order to fit the above form. static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef Mask, MVT VT, - SDValue V1, SDValue V2, SelectionDAG &DAG, + SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { int SplatIndex = -1; for (const auto &M : Mask) { @@ -2091,7 +2091,7 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef Mask, MVT VT, /// Lower VECTOR_SHUFFLE into XVPERMI (if possible). static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef Mask, MVT VT, - SDValue V1, SDValue V2, SelectionDAG &DAG, + SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { // Only consider XVPERMI_D. if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64)) @@ -2110,7 +2110,7 @@ lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef Mask, MVT VT, /// Lower VECTOR_SHUFFLE into XVPERM (if possible). static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, SDValue V2, + MVT VT, SDValue V1, SelectionDAG &DAG) { // LoongArch LASX only have XVPERM_W. if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32)) @@ -2548,16 +2548,16 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue Result; // TODO: Add more comparison patterns. if (V2.isUndef()) { - if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, V2, DAG, - Subtarget))) + if ((Result = + lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget))) return Result; if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; if ((Result = - lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget))) + lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget))) return Result; - if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG))) + if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG))) return Result; // TODO: This comment may be enabled in the future to better match the From 9776ec206e7392ffdecaa5550d40ca27696211a8 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Fri, 26 Sep 2025 14:51:25 +0800 Subject: [PATCH 4/4] clang format --- llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index cc289973120fb..5d4a8fd080202 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2118,8 +2118,7 @@ lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef Mask, MVT VT, /// Lower VECTOR_SHUFFLE into XVPERM (if possible). static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, - SelectionDAG &DAG, + MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { // LoongArch LASX only have XVPERM_W. if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32)) @@ -2569,8 +2568,7 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, if ((Result = lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget))) return Result; - if ((Result = - lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget))) + if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget))) return Result; // TODO: This comment may be enabled in the future to better match the