-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LoongArch] Custom legalize vector_shuffle to xvpermi.d when possible #160429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) ChangesFull diff: https://github.com/llvm/llvm-project/pull/160429.diff 4 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index c264a8a0f6a54..37b3992cd0e0a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2088,6 +2088,26 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
}
+/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
+static SDValue
+lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ // Only consider XVPERMI_D.
+ if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
+ return SDValue();
+
+ unsigned MaskImm = 0;
+ for (unsigned i = 0; i < Mask.size(); ++i) {
+ if (Mask[i] == -1)
+ continue;
+ MaskImm |= Mask[i] << (i * 2);
+ }
+
+ return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
+ DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
+}
+
/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
@@ -2534,6 +2554,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
Subtarget)))
return Result;
+ if ((Result =
+ lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG)))
return Result;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
index 30539427a1a0a..0b8015ddbdd4a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
@@ -7,13 +7,12 @@
define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: shufflevector_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3
-; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238
-; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1
-; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
+; CHECK-NEXT: xvpermi.d $xr2, $xr0, 3
+; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3
+; CHECK-NEXT: vextrins.d $vr2, $vr3, 16
; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2
; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2
+; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
; CHECK-NEXT: ret
entry:
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 6, i32 3, i32 7>
diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
index 5f76d9951df9c..ee1e9f4ce4e5c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
@@ -5,11 +5,8 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) {
; CHECK-LABEL: shuffle_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
-; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
+; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_0)
+; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.h $xr1, $xr2, $xr0
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
; CHECK-NEXT: ret
@@ -33,11 +30,8 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) {
; CHECK-LABEL: shuffle_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1)
-; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
+; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.w $xr1, $xr2, $xr0
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
; CHECK-NEXT: ret
@@ -71,10 +65,7 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
; CHECK-LABEL: shuffle_v8i32_same_lane:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
ret <8 x i32> %shuffle
@@ -83,14 +74,7 @@ define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
; CHECK-LABEL: shuffle_v4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI6_1)
-; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
ret <4 x i64> %shuffle
@@ -99,10 +83,7 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
define <4 x i64> @shuffle_v4i64_same_lane(<4 x i64> %a) {
; CHECK-LABEL: shuffle_v4i64_same_lane:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
ret <4 x i64> %shuffle
@@ -135,14 +116,7 @@ define <8 x float> @shuffle_v8f32_same_lane(<8 x float> %a) {
define <4 x double> @shuffle_v4f64(<4 x double> %a) {
; CHECK-LABEL: shuffle_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI10_1)
-; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
ret <4 x double> %shuffle
@@ -151,11 +125,7 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) {
define <4 x double> @shuffle_v4f64_same_lane(<4 x double> %a) {
; CHECK-LABEL: shuffle_v4f64_same_lane:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 75
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
ret <4 x double> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
index b697a2fd07435..eaf33d46a8803 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
@@ -126,9 +126,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 8
-; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 177
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i64> %shuffle
|
86d0a42
to
80abdb7
Compare
6408325
to
47274d0
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
✅ With the latest revision this PR passed the C/C++ code formatter. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/16493 Here is the relevant piece of the build log for the reference
|
No description provided.