From e0c1ef3b753aa8b629afc32c30c06aec3f938ea7 Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Fri, 31 Oct 2025 16:19:24 +0800 Subject: [PATCH 1/2] [llvm][RISCV] Support P extension CodeGen This patch support PADD_W, PSUB_W, PSADD_W, PSADDU_W, PSSUB_W, PSSUBU_W, PAADD_W and PAADDU_W --- llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 14 ++ llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 180 +++++++++++++++++++++++ 2 files changed, 194 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 7637047aabf2d..126a39996c741 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1539,6 +1539,20 @@ let Predicates = [HasStdExtP, IsRV64] in { // 32-bit PLI SD node pattern def: Pat<(v2i32 (riscv_pli simm10:$imm10)), (PLI_W simm10:$imm10)>; + // Basic 32-bit arithmetic patterns + def: Pat<(v2i32 (add GPR:$rs1, GPR:$rs2)), (PADD_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (sub GPR:$rs1, GPR:$rs2)), (PSUB_W GPR:$rs1, GPR:$rs2)>; + + // 32-bit saturating add/sub patterns + def: Pat<(v2i32 (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (ssubsat GPR:$rs1, GPR:$rs2)), (PSSUB_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (usubsat GPR:$rs1, GPR:$rs2)), (PSSUBU_W GPR:$rs1, GPR:$rs2)>; + + // 32-bit averaging patterns + def: Pat<(v2i32 (avgfloors GPR:$rs1, GPR:$rs2)), (PAADD_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (avgflooru GPR:$rs1, GPR:$rs2)), (PAADDU_W GPR:$rs1, GPR:$rs2)>; + // 32-bit averaging-sub patterns def: Pat<(v2i32 (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_W GPR:$rs1, GPR:$rs2)>; def: Pat<(v2i32 (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_W GPR:$rs1, GPR:$rs2)>; diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll index 000a95fb6e0f8..369bf2a8eef7c 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll @@ -495,6 +495,182 @@ define void @test_extract_vector_32(ptr %ret_ptr, ptr %a_ptr) { ret void } +; Test basic add/sub operations for v2i32 (RV64 only) +define void @test_padd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_padd_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: padd.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = add <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_psub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_psub_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: psub.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = sub <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test saturating add operations for v2i32 (RV64 only) +define void @test_psadd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_psadd_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: psadd.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_psaddu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_psaddu_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: psaddu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test saturating sub operations for v2i32 (RV64 only) +define void @test_pssub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_pssub_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pssub.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_pssubu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_pssubu_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pssubu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test averaging floor signed operations for v2i32 (RV64 only) +; avgfloors pattern: (a + b) arithmetic shift right 1 +define void @test_paadd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_paadd_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: paadd.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %ext.a = sext <2 x i32> %a to <2 x i64> + %ext.b = sext <2 x i32> %b to <2 x i64> + %add = add nsw <2 x i64> %ext.a, %ext.b + %shift = ashr <2 x i64> %add, + %res = trunc <2 x i64> %shift to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test averaging floor unsigned operations for v2i32 (RV64 only) +; avgflooru pattern: (a & b) + ((a ^ b) >> 1) +define void @test_paaddu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_paaddu_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: paaddu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %and = and <2 x i32> %a, %b + %xor = xor <2 x i32> %a, %b + %shift = lshr <2 x i32> %xor, + %res = add <2 x i32> %and, %shift + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test averaging floor subtraction signed for v2i32 (RV64 only) +; pasub pattern: (a - b) arithmetic shift right 1 +define void @test_pasub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_pasub_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pasub.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %a_ext = sext <2 x i32> %a to <2 x i64> + %b_ext = sext <2 x i32> %b to <2 x i64> + %sub = sub <2 x i64> %a_ext, %b_ext + %res = ashr <2 x i64> %sub, + %res_trunc = trunc <2 x i64> %res to <2 x i32> + store <2 x i32> %res_trunc, ptr %ret_ptr + ret void +} + +; Test averaging floor subtraction unsigned for v2i32 (RV64 only) +; pasubu pattern: (a - b) logical shift right 1 +define void @test_pasubu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_pasubu_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pasubu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %a_ext = zext <2 x i32> %a to <2 x i64> + %b_ext = zext <2 x i32> %b to <2 x i64> + %sub = sub <2 x i64> %a_ext, %b_ext + %res = lshr <2 x i64> %sub, + %res_trunc = trunc <2 x i64> %res to <2 x i32> + store <2 x i32> %res_trunc, ptr %ret_ptr + ret void +} + ; Intrinsic declarations declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) @@ -512,3 +688,7 @@ declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>) +declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) +declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) From f63f9b53b76581ae71ff7a605f09a82c4d9e3758 Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Thu, 13 Nov 2025 23:21:26 -0800 Subject: [PATCH 2/2] fixup! remove declare --- llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll index 369bf2a8eef7c..353039e9482e9 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll @@ -688,7 +688,3 @@ declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>) -declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>)