Skip to content

Conversation

@4vtomat
Copy link
Member

@4vtomat 4vtomat commented Nov 13, 2025

This patch support PADD_W, PSUB_W, PSADD_W, PSADDU_W, PSSUB_W, PSSUBU_W,
PAADD_W and PAADDU_W

This patch support PADD_W, PSUB_W, PSADD_W, PSADDU_W, PSSUB_W, PSSUBU_W,
PAADD_W and PAADDU_W
@llvmbot
Copy link
Member

llvmbot commented Nov 13, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Brandon Wu (4vtomat)

Changes

This patch support PADD_W, PSUB_W, PSADD_W, PSADDU_W, PSSUB_W, PSSUBU_W,
PAADD_W and PAADDU_W


Full diff: https://github.com/llvm/llvm-project/pull/167882.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoP.td (+14)
  • (modified) llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll (+180)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 7637047aabf2d..126a39996c741 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1539,6 +1539,20 @@ let Predicates = [HasStdExtP, IsRV64] in {
   // 32-bit PLI SD node pattern
   def: Pat<(v2i32 (riscv_pli simm10:$imm10)), (PLI_W simm10:$imm10)>;
 
+  // Basic 32-bit arithmetic patterns
+  def: Pat<(v2i32 (add GPR:$rs1, GPR:$rs2)), (PADD_W GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(v2i32 (sub GPR:$rs1, GPR:$rs2)), (PSUB_W GPR:$rs1, GPR:$rs2)>;
+
+  // 32-bit saturating add/sub patterns
+  def: Pat<(v2i32 (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_W GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(v2i32 (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_W GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(v2i32 (ssubsat GPR:$rs1, GPR:$rs2)), (PSSUB_W GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(v2i32 (usubsat GPR:$rs1, GPR:$rs2)), (PSSUBU_W GPR:$rs1, GPR:$rs2)>;
+
+  // 32-bit averaging patterns
+  def: Pat<(v2i32 (avgfloors GPR:$rs1, GPR:$rs2)), (PAADD_W GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(v2i32 (avgflooru GPR:$rs1, GPR:$rs2)), (PAADDU_W GPR:$rs1, GPR:$rs2)>;
+
   // 32-bit averaging-sub patterns
   def: Pat<(v2i32 (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_W GPR:$rs1, GPR:$rs2)>;
   def: Pat<(v2i32 (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_W GPR:$rs1, GPR:$rs2)>;
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index 000a95fb6e0f8..369bf2a8eef7c 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -495,6 +495,182 @@ define void @test_extract_vector_32(ptr %ret_ptr, ptr %a_ptr) {
   ret void
 }
 
+; Test basic add/sub operations for v2i32 (RV64 only)
+define void @test_padd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_padd_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    padd.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %res = add <2 x i32> %a, %b
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_psub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psub_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    psub.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %res = sub <2 x i32> %a, %b
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+; Test saturating add operations for v2i32 (RV64 only)
+define void @test_psadd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psadd_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    psadd.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_psaddu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psaddu_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    psaddu.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+; Test saturating sub operations for v2i32 (RV64 only)
+define void @test_pssub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pssub_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pssub.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_pssubu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pssubu_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pssubu.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+; Test averaging floor signed operations for v2i32 (RV64 only)
+; avgfloors pattern: (a + b) arithmetic shift right 1
+define void @test_paadd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_paadd_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    paadd.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %ext.a = sext <2 x i32> %a to <2 x i64>
+  %ext.b = sext <2 x i32> %b to <2 x i64>
+  %add = add nsw <2 x i64> %ext.a, %ext.b
+  %shift = ashr <2 x i64> %add, <i64 1, i64 1>
+  %res = trunc <2 x i64> %shift to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+; Test averaging floor unsigned operations for v2i32 (RV64 only)
+; avgflooru pattern: (a & b) + ((a ^ b) >> 1)
+define void @test_paaddu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_paaddu_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    paaddu.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %and = and <2 x i32> %a, %b
+  %xor = xor <2 x i32> %a, %b
+  %shift = lshr <2 x i32> %xor, <i32 1, i32 1>
+  %res = add <2 x i32> %and, %shift
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+; Test averaging floor subtraction signed for v2i32 (RV64 only)
+; pasub pattern: (a - b) arithmetic shift right 1
+define void @test_pasub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pasub_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pasub.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %b_ext = sext <2 x i32> %b to <2 x i64>
+  %sub = sub <2 x i64> %a_ext, %b_ext
+  %res = ashr <2 x i64> %sub, <i64 1, i64 1>
+  %res_trunc = trunc <2 x i64> %res to <2 x i32>
+  store <2 x i32> %res_trunc, ptr %ret_ptr
+  ret void
+}
+
+; Test averaging floor subtraction unsigned for v2i32 (RV64 only)
+; pasubu pattern: (a - b) logical shift right 1
+define void @test_pasubu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pasubu_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pasubu.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %a_ext = zext <2 x i32> %a to <2 x i64>
+  %b_ext = zext <2 x i32> %b to <2 x i64>
+  %sub = sub <2 x i64> %a_ext, %b_ext
+  %res = lshr <2 x i64> %sub, <i64 1, i64 1>
+  %res_trunc = trunc <2 x i64> %res to <2 x i32>
+  store <2 x i32> %res_trunc, ptr %ret_ptr
+  ret void
+}
+
 ; Intrinsic declarations
 declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>)
 declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>)
@@ -512,3 +688,7 @@ declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>)
 declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>)
 declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>)
 declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>)
+declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>)

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>)
declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please stop adding intrinsic declarations. They aren't required.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it!

@4vtomat 4vtomat enabled auto-merge (squash) November 14, 2025 07:22
@4vtomat 4vtomat merged commit 6b16b31 into llvm:main Nov 14, 2025
9 of 10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants