diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index ac95ef5f30888..fc9aec6d21714 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2150,10 +2150,30 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { - // When the size is less than or equal to 4, lower cost instructions may be - // used. - if (Mask.size() <= 4) - return SDValue(); + // XVSHUF4I_D must be handled separately because it is different from other + // types of [X]VSHUF4I instructions. + if (Mask.size() == 4) { + unsigned MaskImm = 0; + for (int i = 1; i >= 0; --i) { + int MLo = Mask[i]; + int MHi = Mask[i + 2]; + if (!(MLo == -1 || (MLo >= 0 && MLo <= 1) || (MLo >= 4 && MLo <= 5)) || + !(MHi == -1 || (MHi >= 2 && MHi <= 3) || (MHi >= 6 && MHi <= 7))) + return SDValue(); + if (MHi != -1 && MLo != -1 && MHi != MLo + 2) + return SDValue(); + + MaskImm <<= 2; + if (MLo != -1) + MaskImm |= ((MLo <= 1) ? MLo : (MLo - 2)) & 0x3; + else if (MHi != -1) + MaskImm |= ((MHi <= 3) ? (MHi - 2) : (MHi - 4)) & 0x3; + } + + return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2, + DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT())); + } + return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget); } @@ -2704,6 +2724,10 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, return Result; if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG))) return Result; + if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) && + (Result = + lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) + return Result; if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget, Zeroable))) return Result; diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll index 69437a24282b2..a5f876999b1e9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll @@ -37,9 +37,7 @@ define <8 x i32> @shufflevector_xvshuf4i_v8i32(<8 x i32> %a, <8 x i32> %b) { define <4 x i64> @shufflevector_xvshuf4i_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: shufflevector_xvshuf4i_v4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8 -; CHECK-NEXT: xvbsll.v $xr1, $xr1, 8 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9 ; CHECK-NEXT: ret %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %c @@ -59,10 +57,7 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b) define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: shufflevector_xvshuf4i_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI5_0) -; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr2, 0 +; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 6 ; CHECK-NEXT: ret %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %c diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll index 2007f851129e8..9dfa5030ecfa1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll @@ -105,9 +105,7 @@ define <8 x i32> @byte_rotate_v8i32_3(<8 x i32> %a) nounwind { define <4 x i64> @byte_rotate_v4i64_1(<4 x i64> %a, <4 x i64> %b) nounwind { ; CHECK-LABEL: byte_rotate_v4i64_1: ; CHECK: # %bb.0: -; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8 -; CHECK-NEXT: xvbsll.v $xr1, $xr1, 8 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle @@ -116,9 +114,7 @@ define <4 x i64> @byte_rotate_v4i64_1(<4 x i64> %a, <4 x i64> %b) nounwind { define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind { ; CHECK-LABEL: byte_rotate_v4i64_2: ; CHECK: # %bb.0: -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 3 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle @@ -127,7 +123,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind { define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind { ; CHECK-LABEL: byte_rotate_v4i64_3: ; CHECK: # %bb.0: -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 177 +; CHECK-NEXT: xvshuf4i.d $xr0, $xr0, 1 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> ret <4 x i64> %shuffle