diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index ca4a655f06587..80c96c6dc8eb6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1701,6 +1701,43 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef Mask, MVT VT, DAG.getConstant(Imm, DL, GRLenVT)); } +/// Lower VECTOR_SHUFFLE whose result is the reversed source vector. +/// +/// It is possible to do optimization for VECTOR_SHUFFLE performing vector +/// reverse whose mask likes: +/// <7, 6, 5, 4, 3, 2, 1, 0> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue +lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + // Only vectors with i8/i16 elements which cannot match other patterns + // directly needs to do this. + if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 && + VT != MVT::v16i16) + return SDValue(); + + if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size())) + return SDValue(); + + int WidenNumElts = VT.getVectorNumElements() / 4; + SmallVector WidenMask(WidenNumElts, -1); + for (int i = 0; i < WidenNumElts; ++i) + WidenMask[i] = WidenNumElts - 1 - i; + + MVT WidenVT = MVT::getVectorVT( + VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts); + SDValue NewV1 = DAG.getBitcast(WidenVT, V1); + SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1, + DAG.getUNDEF(WidenVT), WidenMask); + + return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, + DAG.getBitcast(VT, WidenRev), + DAG.getConstant(27, DL, Subtarget.getGRLenVT())); +} + /// Lower VECTOR_SHUFFLE into VPACKEV (if possible). /// /// VPACKEV interleaves the even elements from each vector. @@ -2004,6 +2041,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; + if ((Result = + lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget))) + return Result; // TODO: This comment may be enabled in the future to better match the // pattern for instruction selection. @@ -2622,6 +2662,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, return Result; if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget))) return Result; + if ((Result = + lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget))) + return Result; // TODO: This comment may be enabled in the future to better match the // pattern for instruction selection. diff --git a/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll b/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll index b57d90c162c3b..19b9b53c3a3cc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll @@ -6,10 +6,9 @@ define void @shufflevector_reverse_v32i8(ptr %res, ptr %a) nounwind { ; CHECK-LABEL: shufflevector_reverse_v32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0) -; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI0_0) ; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 -; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1 +; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 27 +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -23,11 +22,9 @@ define void @shufflevector_reverse_v16i16(ptr %res, ptr %a) nounwind { ; CHECK-LABEL: shufflevector_reverse_v16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) -; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI1_0) -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 -; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0 -; CHECK-NEXT: xvst $xr1, $a0, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 27 +; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 27 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <16 x i16>, ptr %a diff --git a/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll b/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll index 29f038adb9765..a7b59e5cbcdaa 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll @@ -6,9 +6,8 @@ define void @shufflevector_reverse_v16i8(ptr %res, ptr %a) nounwind { ; CHECK-LABEL: shufflevector_reverse_v16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0) -; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI0_0) -; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1 +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 27 +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -22,10 +21,9 @@ define void @shufflevector_reverse_v8i16(ptr %res, ptr %a) nounwind { ; CHECK-LABEL: shufflevector_reverse_v8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) -; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI1_0) -; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0 -; CHECK-NEXT: vst $vr1, $a0, 0 +; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1 +; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 27 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %va = load <8 x i16>, ptr %a