Skip to content

Conversation

zhaoqi5
Copy link
Contributor

@zhaoqi5 zhaoqi5 commented Sep 24, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Sep 24, 2025

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/160429.diff

4 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+23)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll (+4-5)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll (+9-39)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll (+1-3)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index c264a8a0f6a54..37b3992cd0e0a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2088,6 +2088,26 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
   return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
 }
 
+/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
+static SDValue
+lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+                            SDValue V1, SDValue V2, SelectionDAG &DAG,
+                            const LoongArchSubtarget &Subtarget) {
+  // Only consider XVPERMI_D.
+  if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
+    return SDValue();
+
+  unsigned MaskImm = 0;
+  for (unsigned i = 0; i < Mask.size(); ++i) {
+    if (Mask[i] == -1)
+      continue;
+    MaskImm |= Mask[i] << (i * 2);
+  }
+
+  return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
+                     DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
+}
+
 /// Lower VECTOR_SHUFFLE into XVPERM (if possible).
 static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
                                           MVT VT, SDValue V1, SDValue V2,
@@ -2534,6 +2554,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
     if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
                                                Subtarget)))
       return Result;
+    if ((Result =
+             lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
+      return Result;
     if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG)))
       return Result;
 
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
index 30539427a1a0a..0b8015ddbdd4a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
@@ -7,13 +7,12 @@
 define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: shufflevector_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvpickve.d $xr2, $xr1, 3
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 238
-; CHECK-NEXT:    xvrepl128vei.d $xr3, $xr3, 1
-; CHECK-NEXT:    vextrins.d $vr3, $vr2, 16
+; CHECK-NEXT:    xvpermi.d $xr2, $xr0, 3
+; CHECK-NEXT:    xvpickve.d $xr3, $xr1, 3
+; CHECK-NEXT:    vextrins.d $vr2, $vr3, 16
 ; CHECK-NEXT:    xvpickve.d $xr1, $xr1, 2
 ; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT:    xvpermi.q $xr0, $xr3, 2
+; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2
 ; CHECK-NEXT:    ret
 entry:
   %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 6, i32 3, i32 7>
diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
index 5f76d9951df9c..ee1e9f4ce4e5c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
@@ -5,11 +5,8 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) {
 ; CHECK-LABEL: shuffle_v32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI0_1)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr2, $xr0, $xr3
+; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI0_0)
+; CHECK-NEXT:    xvpermi.d $xr2, $xr0, 78
 ; CHECK-NEXT:    xvshuf.h $xr1, $xr2, $xr0
 ; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
 ; CHECK-NEXT:    ret
@@ -33,11 +30,8 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) {
 ; CHECK-LABEL: shuffle_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI2_1)
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr2, $xr0, $xr3
+; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT:    xvpermi.d $xr2, $xr0, 78
 ; CHECK-NEXT:    xvshuf.w $xr1, $xr2, $xr0
 ; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
 ; CHECK-NEXT:    ret
@@ -71,10 +65,7 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
 define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
 ; CHECK-LABEL: shuffle_v8i32_same_lane:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT:    xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 225
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i32> %shuffle
@@ -83,14 +74,7 @@ define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
 define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
 ; CHECK-LABEL: shuffle_v4i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_1)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI6_1)
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT:    xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 39
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
   ret <4 x i64> %shuffle
@@ -99,10 +83,7 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
 define <4 x i64> @shuffle_v4i64_same_lane(<4 x i64> %a) {
 ; CHECK-LABEL: shuffle_v4i64_same_lane:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT:    xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 225
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
   ret <4 x i64> %shuffle
@@ -135,14 +116,7 @@ define <8 x float> @shuffle_v8f32_same_lane(<8 x float> %a) {
 define <4 x double> @shuffle_v4f64(<4 x double> %a) {
 ; CHECK-LABEL: shuffle_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI10_1)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI10_1)
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT:    xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 39
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
   ret <4 x double> %shuffle
@@ -151,11 +125,7 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) {
 define <4 x double> @shuffle_v4f64_same_lane(<4 x double> %a) {
 ; CHECK-LABEL: shuffle_v4f64_same_lane:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 75
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
   ret <4 x double> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
index b697a2fd07435..eaf33d46a8803 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
@@ -126,9 +126,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
 define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind {
 ; CHECK-LABEL: byte_rotate_v4i64_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvbsrl.v $xr1, $xr0, 8
-; CHECK-NEXT:    xvbsll.v $xr0, $xr0, 8
-; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 177
 ; CHECK-NEXT:    ret
     %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
     ret <4 x i64> %shuffle

@zhaoqi5 zhaoqi5 force-pushed the users/zhaoqi5/refine-vectorshuffle-legalizing branch from 86d0a42 to 80abdb7 Compare September 24, 2025 05:06
@zhaoqi5 zhaoqi5 force-pushed the users/zhaoqi5/custom-lowering-xvpermi_d branch from 6408325 to 47274d0 Compare September 24, 2025 05:24
Copy link
Member

@heiher heiher left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

Base automatically changed from users/zhaoqi5/refine-vectorshuffle-legalizing to main September 26, 2025 06:38
Copy link

github-actions bot commented Sep 26, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@zhaoqi5 zhaoqi5 merged commit beed796 into main Sep 26, 2025
7 of 9 checks passed
@zhaoqi5 zhaoqi5 deleted the users/zhaoqi5/custom-lowering-xvpermi_d branch September 26, 2025 07:34
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 26, 2025

LLVM Buildbot has detected a new failure on builder openmp-s390x-linux running on systemz-1 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/16493

Here is the relevant piece of the build log for the reference
Step 6 (test-openmp) failure: test (failure)
******************** TEST 'libomp :: tasking/issue-94260-2.c' FAILED ********************
Exit Code: -11

Command Output (stdout):
--
# RUN: at line 1
/home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp   -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src  -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic && /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/./bin/clang -fopenmp -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test -L /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/src -fno-omit-frame-pointer -mbackchain -I /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/ompt /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.src/openmp/runtime/test/tasking/issue-94260-2.c -o /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp -lm -latomic
# executed command: /home/uweigand/sandbox/buildbot/openmp-s390x-linux/llvm.build/runtimes/runtimes-bins/openmp/runtime/test/tasking/Output/issue-94260-2.c.tmp
# note: command had no output on stdout or stderr
# error: command failed with exit status: -11

--

********************


mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Oct 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants