From f38bb557b1408dcd502b73f586cefe802b11738a Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Fri, 24 Oct 2025 17:00:29 +0800 Subject: [PATCH 1/2] [LoongArch] Custom legalize vector_shuffle to `[x]vpermi.w` --- .../LoongArch/LoongArchISelLowering.cpp | 132 ++++++++++++++++-- .../LoongArch/LoongArchLASXInstrInfo.td | 6 +- .../Target/LoongArch/LoongArchLSXInstrInfo.td | 7 + .../lasx/ir-instruction/shuffle-as-xvpermi.ll | 10 +- .../lasx/ir-instruction/shuffle-as-xvshuf.ll | 9 +- .../lsx/ir-instruction/shuffle-as-vpermi.ll | 10 +- .../lsx/ir-instruction/shuffle-as-vshuf.ll | 10 +- .../LoongArch/lsx/widen-shuffle-mask.ll | 6 +- 8 files changed, 144 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index ac95ef5f30888..fa9e71ae7e52b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2004,6 +2004,85 @@ static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef Mask, return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); } +// Check the Mask and then build SrcVec and MaskImm infos which will +// be used to build LoongArchISD nodes for VPERMI_W or XVPERMI_W. +// On success, return true. Otherwise, return false. +static bool buildVPERMIInfo(ArrayRef Mask, SDValue V1, SDValue V2, + SmallVectorImpl &SrcVec, + unsigned &MaskImm) { + unsigned MaskSize = Mask.size(); + + auto isValid = [&](int M, int Off) { + return (M == -1) || (M >= Off && M < Off + 4); + }; + + auto buildImm = [&](int MLo, int MHi, unsigned Off, unsigned I) { + auto immPart = [&](int M, unsigned Off) { + return (M == -1 ? 0 : (M - Off)) & 0x3; + }; + MaskImm |= immPart(MLo, Off) << (I * 2); + MaskImm |= immPart(MHi, Off) << ((I + 1) * 2); + }; + + for (unsigned i = 0; i < 4; i += 2) { + int MLo = Mask[i]; + int MHi = Mask[i + 1]; + + if (MaskSize == 8) { // Only v8i32/v8f32 need this check. + int M2Lo = Mask[i + 4]; + int M2Hi = Mask[i + 5]; + if (M2Lo != MLo + 4 || M2Hi != MHi + 4) + return false; + } + + if (isValid(MLo, 0) && isValid(MHi, 0)) { + SrcVec.push_back(V1); + buildImm(MLo, MHi, 0, i); + } else if (isValid(MLo, MaskSize) && isValid(MHi, MaskSize)) { + SrcVec.push_back(V2); + buildImm(MLo, MHi, MaskSize, i); + } else { + return false; + } + } + + return true; +} + +/// Lower VECTOR_SHUFFLE into VPERMI (if possible). +/// +/// VPERMI selects two elements from each of the two vectors based on the +/// mask and places them in the corresponding positions of the result vector +/// in order. Only v4i32 and v4f32 types are allowed. +/// +/// It is possible to lower into VPERMI when the mask consists of two of the +/// following forms concatenated: +/// +/// +/// where i,j are in [0,4) and u,v are in [4, 8). +/// For example: +/// <2, 3, 4, 5> +/// <5, 7, 0, 2> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPERMI(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + if ((VT != MVT::v4i32 && VT != MVT::v4f32) || + Mask.size() != VT.getVectorNumElements()) + return SDValue(); + + SmallVector SrcVec; + unsigned MaskImm = 0; + if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm)) + return SDValue(); + + return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[0], SrcVec[1], + DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT())); +} + /// Lower VECTOR_SHUFFLE into VSHUF. /// /// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and @@ -2087,12 +2166,15 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, (Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; - if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG, - Zeroable))) - return Result; if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget, Zeroable))) return Result; + if ((Result = + lowerVECTOR_SHUFFLE_VPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget))) + return Result; + if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG, + Zeroable))) + return Result; if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; @@ -2160,21 +2242,40 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef Mask, MVT VT, /// Lower VECTOR_SHUFFLE into XVPERMI (if possible). static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef Mask, MVT VT, - SDValue V1, SelectionDAG &DAG, + SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { - // Only consider XVPERMI_D. - if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64)) + MVT GRLenVT = Subtarget.getGRLenVT(); + unsigned MaskSize = Mask.size(); + if (MaskSize != VT.getVectorNumElements()) return SDValue(); - unsigned MaskImm = 0; - for (unsigned i = 0; i < Mask.size(); ++i) { - if (Mask[i] == -1) - continue; - MaskImm |= Mask[i] << (i * 2); + // Consider XVPERMI_W. + if (VT == MVT::v8i32 || VT == MVT::v8f32) { + SmallVector SrcVec; + unsigned MaskImm = 0; + if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm)) + return SDValue(); + + return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[0], SrcVec[1], + DAG.getConstant(MaskImm, DL, GRLenVT)); } - return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1, - DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT())); + // Consider XVPERMI_D. + if (VT == MVT::v4i64 || VT == MVT::v4f64) { + unsigned MaskImm = 0; + for (unsigned i = 0; i < MaskSize; ++i) { + if (Mask[i] == -1) + continue; + if (Mask[i] >= (int)MaskSize) + return SDValue(); + MaskImm |= Mask[i] << (i * 2); + } + + return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1, + DAG.getConstant(MaskImm, DL, GRLenVT)); + } + + return SDValue(); } /// Lower VECTOR_SHUFFLE into XVPERM (if possible). @@ -2677,7 +2778,7 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG)) return NewShuffle; if ((Result = - lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget))) + lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget))) return Result; @@ -2707,6 +2808,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget, Zeroable))) return Result; + if ((Result = + lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget))) + return Result; if ((Result = lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 599cb4ac9aeef..67070d003d62b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1898,7 +1898,11 @@ def : Pat<(loongarch_vreplvei v8f32:$xj, immZExt2:$ui2), def : Pat<(loongarch_vreplvei v4f64:$xj, immZExt1:$ui1), (XVREPL128VEI_D v4f64:$xj, immZExt1:$ui1)>; -// XVPERMI_D +// XVPERMI_{W/D} +def : Pat<(loongarch_vpermi v8i32:$xj, v8i32:$xk, immZExt8:$ui8), + (XVPERMI_W v8i32:$xj, v8i32:$xk, immZExt8:$ui8)>; +def : Pat<(loongarch_vpermi v8f32:$xj, v8f32:$xk, immZExt8:$ui8), + (XVPERMI_W v8f32:$xj, v8f32:$xk, immZExt8:$ui8)>; def : Pat<(loongarch_xvpermi v4i64:$xj, immZExt8: $ui8), (XVPERMI_D v4i64:$xj, immZExt8: $ui8)>; def : Pat<(loongarch_xvpermi v4f64:$xj, immZExt8: $ui8), diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 58d5acfb40f41..3061a774ae4e3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -67,6 +67,7 @@ def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>; def loongarch_vshuf4i_d : SDNode<"LoongArchISD::VSHUF4I_D", SDT_LoongArchV2RUimm>; def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>; def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>; +def loongarch_vpermi : SDNode<"LoongArchISD::VPERMI", SDT_LoongArchV2RUimm>; def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>; def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>; @@ -2071,6 +2072,12 @@ def : Pat<(loongarch_vshuf4i_d v2i64:$vj, v2i64:$vk, immZExt8:$ui8), def : Pat<(loongarch_vshuf4i_d v2f64:$vj, v2f64:$vk, immZExt8:$ui8), (VSHUF4I_D v2f64:$vj, v2f64:$vk, immZExt8:$ui8)>; +// VPERMI_W +def : Pat<(loongarch_vpermi v4i32:$vj, v4i32:$vk, immZExt8:$ui8), + (VPERMI_W v4i32:$vj, v4i32:$vk, immZExt8:$ui8)>; +def : Pat<(loongarch_vpermi v4f32:$vj, v4f32:$vk, immZExt8:$ui8), + (VPERMI_W v4f32:$vj, v4f32:$vk, immZExt8:$ui8)>; + // VREPLVEI_{B/H/W/D} def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4), (VREPLVEI_B v16i8:$vj, immZExt4:$ui4)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpermi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpermi.ll index f73f7a659abed..372e053ae64f0 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpermi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpermi.ll @@ -8,9 +8,7 @@ define void @shufflevector_xvpermi_v8i32(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8 -; CHECK-NEXT: xvbsll.v $xr1, $xr1, 8 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvpermi.w $xr0, $xr1, 78 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -27,10 +25,8 @@ define void @shufflevector_xvpermi_v8f32(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI1_0) -; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 -; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: xvpermi.w $xr1, $xr0, 141 +; CHECK-NEXT: xvst $xr1, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <8 x float>, ptr %a diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll index 4900146b69a25..a9bdbf9525df8 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll @@ -35,12 +35,9 @@ define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) { define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: shufflevector_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0) -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68 -; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr2, 0 +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 68 +; CHECK-NEXT: xvpermi.d $xr0, $xr1, 68 +; CHECK-NEXT: xvpermi.w $xr0, $xr2, 180 ; CHECK-NEXT: ret %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %c diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpermi.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpermi.ll index ca65dd87f6c61..39ca00def281b 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpermi.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpermi.ll @@ -8,9 +8,7 @@ define void @shufflevector_vpermi_v4i32(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8 -; CHECK-NEXT: vbsll.v $vr1, $vr1, 8 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vpermi.w $vr0, $vr1, 78 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -27,10 +25,8 @@ define void @shufflevector_vpermi_v4f32(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) -; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI1_0) -; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vpermi.w $vr1, $vr0, 141 +; CHECK-NEXT: vst $vr1, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <4 x float>, ptr %a diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll index b13433ee5d159..d9c8563e8a9b6 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll @@ -30,10 +30,7 @@ define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shufflevector_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI2_0) -; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 -; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: vpermi.w $vr0, $vr1, 220 ; CHECK-NEXT: ret %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -53,10 +50,7 @@ define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: shufflevector_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI4_0) -; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 -; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: vpermi.w $vr0, $vr1, 220 ; CHECK-NEXT: ret %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %c diff --git a/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll b/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll index 42ef9133bf04d..83359e3340bde 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll @@ -29,7 +29,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_v4i32(<16 x i8> %a, <16 x i8> %b) define <16 x i8> @widen_shuffle_mask_v16i8_to_v2i64(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v16i8_to_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12 +; CHECK-NEXT: vpermi.w $vr0, $vr1, 228 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r @@ -50,7 +50,7 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v4i32(<8 x i16> %a, <8 x i16> %b) define <8 x i16> @widen_shuffle_mask_v8i16_to_v2i64(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: widen_shuffle_mask_v8i16_to_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12 +; CHECK-NEXT: vpermi.w $vr0, $vr1, 228 ; CHECK-NEXT: ret %r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %r @@ -59,7 +59,7 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v2i64(<8 x i16> %a, <8 x i16> %b) define <4 x i32> @widen_shuffle_mask_v4i32_to_v2i64(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: widen_shuffle_mask_v4i32_to_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12 +; CHECK-NEXT: vpermi.w $vr0, $vr1, 228 ; CHECK-NEXT: ret %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %r From a967cb69fd5504c89b49cd6f567c5bdc724dbff4 Mon Sep 17 00:00:00 2001 From: Qi Zhao Date: Fri, 24 Oct 2025 19:22:59 +0800 Subject: [PATCH 2/2] fix --- llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 4 ++-- .../LoongArch/lasx/ir-instruction/shuffle-as-xvpermi.ll | 8 ++++---- .../LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll | 6 +++--- .../LoongArch/lsx/ir-instruction/shuffle-as-vpermi.ll | 8 ++++---- .../LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll | 6 ++++-- llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll | 9 ++++++--- 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index fa9e71ae7e52b..012fe4aed1242 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2079,7 +2079,7 @@ static SDValue lowerVECTOR_SHUFFLE_VPERMI(const SDLoc &DL, ArrayRef Mask, if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm)) return SDValue(); - return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[0], SrcVec[1], + return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0], DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT())); } @@ -2256,7 +2256,7 @@ lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef Mask, MVT VT, if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm)) return SDValue(); - return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[0], SrcVec[1], + return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0], DAG.getConstant(MaskImm, DL, GRLenVT)); } diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpermi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpermi.ll index 372e053ae64f0..a2e3a280f8db4 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpermi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpermi.ll @@ -8,8 +8,8 @@ define void @shufflevector_xvpermi_v8i32(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvpermi.w $xr0, $xr1, 78 -; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.w $xr1, $xr0, 78 +; CHECK-NEXT: xvst $xr1, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <8 x i32>, ptr %a @@ -25,8 +25,8 @@ define void @shufflevector_xvpermi_v8f32(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvpermi.w $xr1, $xr0, 141 -; CHECK-NEXT: xvst $xr1, $a0, 0 +; CHECK-NEXT: xvpermi.w $xr0, $xr1, 141 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <8 x float>, ptr %a diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll index a9bdbf9525df8..008e31227efd0 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll @@ -35,9 +35,9 @@ define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) { define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: shufflevector_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 68 -; CHECK-NEXT: xvpermi.d $xr0, $xr1, 68 -; CHECK-NEXT: xvpermi.w $xr0, $xr2, 180 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 +; CHECK-NEXT: xvpermi.w $xr0, $xr1, 180 ; CHECK-NEXT: ret %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %c diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpermi.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpermi.ll index 39ca00def281b..994515152d738 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpermi.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpermi.ll @@ -8,8 +8,8 @@ define void @shufflevector_vpermi_v4i32(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vpermi.w $vr0, $vr1, 78 -; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: vpermi.w $vr1, $vr0, 78 +; CHECK-NEXT: vst $vr1, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <4 x i32>, ptr %a @@ -25,8 +25,8 @@ define void @shufflevector_vpermi_v4f32(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vpermi.w $vr1, $vr0, 141 -; CHECK-NEXT: vst $vr1, $a0, 0 +; CHECK-NEXT: vpermi.w $vr0, $vr1, 141 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <4 x float>, ptr %a diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll index d9c8563e8a9b6..7cdc085c6e5c4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll @@ -30,7 +30,8 @@ define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shufflevector_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermi.w $vr0, $vr1, 220 +; CHECK-NEXT: vpermi.w $vr1, $vr0, 220 +; CHECK-NEXT: vori.b $vr0, $vr1, 0 ; CHECK-NEXT: ret %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c @@ -50,7 +51,8 @@ define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: shufflevector_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermi.w $vr0, $vr1, 220 +; CHECK-NEXT: vpermi.w $vr1, $vr0, 220 +; CHECK-NEXT: vori.b $vr0, $vr1, 0 ; CHECK-NEXT: ret %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %c diff --git a/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll b/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll index 83359e3340bde..4e04bf3dc4f7f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll @@ -29,7 +29,8 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_v4i32(<16 x i8> %a, <16 x i8> %b) define <16 x i8> @widen_shuffle_mask_v16i8_to_v2i64(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: widen_shuffle_mask_v16i8_to_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermi.w $vr0, $vr1, 228 +; CHECK-NEXT: vpermi.w $vr1, $vr0, 228 +; CHECK-NEXT: vori.b $vr0, $vr1, 0 ; CHECK-NEXT: ret %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %r @@ -50,7 +51,8 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v4i32(<8 x i16> %a, <8 x i16> %b) define <8 x i16> @widen_shuffle_mask_v8i16_to_v2i64(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: widen_shuffle_mask_v8i16_to_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermi.w $vr0, $vr1, 228 +; CHECK-NEXT: vpermi.w $vr1, $vr0, 228 +; CHECK-NEXT: vori.b $vr0, $vr1, 0 ; CHECK-NEXT: ret %r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %r @@ -59,7 +61,8 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v2i64(<8 x i16> %a, <8 x i16> %b) define <4 x i32> @widen_shuffle_mask_v4i32_to_v2i64(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: widen_shuffle_mask_v4i32_to_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermi.w $vr0, $vr1, 228 +; CHECK-NEXT: vpermi.w $vr1, $vr0, 228 +; CHECK-NEXT: vori.b $vr0, $vr1, 0 ; CHECK-NEXT: ret %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %r