diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 7bf6493046882..5d4a8fd080202 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1603,7 +1603,7 @@ static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, /// value is necessary in order to fit the above form. static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef Mask, MVT VT, - SDValue V1, SDValue V2, SelectionDAG &DAG, + SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { int SplatIndex = -1; for (const auto &M : Mask) { @@ -1996,8 +1996,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue Result; // TODO: Add more comparison patterns. if (V2.isUndef()) { - if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG, - Subtarget))) + if ((Result = + lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget))) return Result; if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) @@ -2053,7 +2053,7 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, /// value is necessary in order to fit the above form. static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef Mask, MVT VT, - SDValue V1, SDValue V2, SelectionDAG &DAG, + SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { int SplatIndex = -1; for (const auto &M : Mask) { @@ -2096,10 +2096,29 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef Mask, MVT VT, return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget); } +/// Lower VECTOR_SHUFFLE into XVPERMI (if possible). +static SDValue +lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + // Only consider XVPERMI_D. + if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64)) + return SDValue(); + + unsigned MaskImm = 0; + for (unsigned i = 0; i < Mask.size(); ++i) { + if (Mask[i] == -1) + continue; + MaskImm |= Mask[i] << (i * 2); + } + + return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1, + DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT())); +} + /// Lower VECTOR_SHUFFLE into XVPERM (if possible). static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, SDValue V2, - SelectionDAG &DAG, + MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { // LoongArch LASX only have XVPERM_W. if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32)) @@ -2540,14 +2559,16 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue Result; // TODO: Add more comparison patterns. if (V2.isUndef()) { - if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, V2, DAG, - Subtarget))) + if ((Result = + lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget))) return Result; if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; if ((Result = - lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG, Subtarget))) + lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget))) return Result; // TODO: This comment may be enabled in the future to better match the diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll index 30539427a1a0a..0b8015ddbdd4a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll @@ -7,13 +7,12 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: shufflevector_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3 -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238 -; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1 -; CHECK-NEXT: vextrins.d $vr3, $vr2, 16 +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 3 +; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3 +; CHECK-NEXT: vextrins.d $vr2, $vr3, 16 ; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2 ; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2 +; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 ; CHECK-NEXT: ret entry: %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll index 24f1b31702b71..245f76472b844 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll @@ -6,11 +6,8 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) { ; CHECK-LABEL: shuffle_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 +; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 ; CHECK-NEXT: xvshuf.h $xr1, $xr2, $xr0 ; CHECK-NEXT: xvori.b $xr0, $xr1, 0 ; CHECK-NEXT: ret @@ -34,11 +31,8 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) { ; CHECK-LABEL: shuffle_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 +; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 ; CHECK-NEXT: xvshuf.w $xr1, $xr2, $xr0 ; CHECK-NEXT: xvori.b $xr0, $xr1, 0 ; CHECK-NEXT: ret @@ -72,10 +66,7 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) { define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) { ; CHECK-LABEL: shuffle_v8i32_same_lane: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0) -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225 ; CHECK-NEXT: ret %shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> ret <8 x i32> %shuffle @@ -84,14 +75,7 @@ define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) { define <4 x i64> @shuffle_v4i64(<4 x i64> %a) { ; CHECK-LABEL: shuffle_v4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI6_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 -; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> ret <4 x i64> %shuffle @@ -100,10 +84,7 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) { define <4 x i64> @shuffle_v4i64_same_lane(<4 x i64> %a) { ; CHECK-LABEL: shuffle_v4i64_same_lane: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI7_0) -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> ret <4 x i64> %shuffle @@ -136,14 +117,7 @@ define <8 x float> @shuffle_v8f32_same_lane(<8 x float> %a) { define <4 x double> @shuffle_v4f64(<4 x double> %a) { ; CHECK-LABEL: shuffle_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI10_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 -; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> ret <4 x double> %shuffle @@ -152,11 +126,7 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) { define <4 x double> @shuffle_v4f64_same_lane(<4 x double> %a) { ; CHECK-LABEL: shuffle_v4f64_same_lane: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0) -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 75 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> ret <4 x double> %shuffle diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll index c0fa734034114..2007f851129e8 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll @@ -127,9 +127,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind { define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind { ; CHECK-LABEL: byte_rotate_v4i64_3: ; CHECK: # %bb.0: -; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 8 -; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 177 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> ret <4 x i64> %shuffle