From b0b933bc038e16024c960ad99b46bd2b64d24849 Mon Sep 17 00:00:00 2001 From: Liao Chunyu Date: Tue, 11 Nov 2025 04:28:20 -0500 Subject: [PATCH 1/3] pre testcase --- llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll index a9a13147f5c9b..cf9fe8910e524 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll @@ -814,6 +814,21 @@ define @vadd_vv_mask_nxv8i32( %va, %vc } +define @vadd_vv_mask_nxv8i32_novmerge( %va, %vb, +; CHECK-LABEL: vadd_vv_mask_nxv8i32_novmerge: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vmseq.vi v0, v8, 1 +; CHECK-NEXT: vadd.vv v8, v16, v12 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %vc) { + %mask = icmp eq %va, splat (i32 1) + %vs = select %mask, zeroinitializer , %vb + %vr = add nsw %vc, %vs + ret %vr +} + define @vadd_vx_mask_nxv8i32( %va, i32 signext %b, %mask) { ; CHECK-LABEL: vadd_vx_mask_nxv8i32: ; CHECK: # %bb.0: From 859a0e0f6607a5d5b24c7c88ae335f889b2c1c9c Mon Sep 17 00:00:00 2001 From: Liao Chunyu Date: Tue, 11 Nov 2025 05:59:21 -0500 Subject: [PATCH 2/3] [DAGCombine] Invert vselect to make TrueValue is binop Before: binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal) After for special case: binop N0, (vselect Cond, IDC, FVal) --> vselect InvertCond, N0, (binop FVal, N0) It is possible to better eliminate the vmerge instruction for RISCV. I haven't seen any regression in other targets. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 ++++++ .../RISCV/intrinsic-cttz-elts-vscale.ll | 31 +++++++++---------- llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll | 8 ++--- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index df353c4d91b1a..12e5e2ba1f811 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2508,6 +2508,15 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, FVal)) { SDValue F0 = DAG.getFreeze(N0); SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags()); + // For RISCV prefer to N0 == FVal + if (Cond.getOpcode() == ISD::SETCC) { + EVT CVT = Cond->getValueType(0); + ISD::CondCode NotCC = ISD::getSetCCInverse( + cast(Cond.getOperand(2))->get(), CVT); + SDValue NCond = DAG.getSetCC(SDLoc(N), CVT, Cond.getOperand(0), + Cond.getOperand(1), NotCC); + return DAG.getSelect(SDLoc(N), VT, NCond, NewBO, F0); + } return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO); } // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0 diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index a06c7505d543d..b258ad0f0bca0 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -11,14 +11,13 @@ define i32 @ctz_nxv4i32( %a) #0 { ; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV32-NEXT: vid.v v10 ; RV32-NEXT: li a1, -1 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmsne.vi v0, v8, 0 ; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: vmv.v.x v11, a0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32-NEXT: vmseq.vi v0, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: vmadd.vx v10, a1, v8 -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: vmadd.vx v10, a1, v11 +; RV32-NEXT: vmerge.vim v8, v10, 0, v0 ; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: sub a0, a0, a1 @@ -32,14 +31,13 @@ define i32 @ctz_nxv4i32( %a) #0 { ; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV64-NEXT: vid.v v10 ; RV64-NEXT: li a1, -1 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmsne.vi v0, v8, 0 ; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: vmv.v.x v11, a0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64-NEXT: vmseq.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vmadd.vx v10, a1, v8 -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV64-NEXT: vmadd.vx v10, a1, v11 +; RV64-NEXT: vmerge.vim v8, v10, 0, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: sub a0, a0, a1 @@ -113,13 +111,12 @@ define i64 @ctz_nxv8i1_no_range( %a) { ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vid.v v16 ; RV64-NEXT: li a1, -1 +; RV64-NEXT: vmv.v.x v24, a0 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vmsne.vi v0, v8, 0 +; RV64-NEXT: vmseq.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vmadd.vx v16, a1, v8 -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64-NEXT: vmadd.vx v16, a1, v24 +; RV64-NEXT: vmerge.vim v8, v16, 0, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: sub a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll index cf9fe8910e524..3292c064bb4e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll @@ -817,10 +817,10 @@ define @vadd_vv_mask_nxv8i32( %va, @vadd_vv_mask_nxv8i32_novmerge( %va, %vb, ; CHECK-LABEL: vadd_vv_mask_nxv8i32_novmerge: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 1 -; CHECK-NEXT: vadd.vv v8, v16, v12 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vmsne.vi v0, v8, 1 +; CHECK-NEXT: vadd.vv v16, v16, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %vc) { %mask = icmp eq %va, splat (i32 1) From 7830c3d15209c8859a1f2b371d65a84cc0d80dd2 Mon Sep 17 00:00:00 2001 From: Liao Chunyu Date: Tue, 11 Nov 2025 09:22:07 -0500 Subject: [PATCH 3/3] address comments --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 +++++++------ llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll | 3 +-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 12e5e2ba1f811..56e6a0213d5e2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2498,6 +2498,7 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, unsigned SelOpcode = N1.getOpcode(); unsigned Opcode = N->getOpcode(); EVT VT = N->getValueType(0); + SDLoc DL(N); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // This transform increases uses of N0, so freeze it to be safe. @@ -2513,19 +2514,19 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, EVT CVT = Cond->getValueType(0); ISD::CondCode NotCC = ISD::getSetCCInverse( cast(Cond.getOperand(2))->get(), CVT); - SDValue NCond = DAG.getSetCC(SDLoc(N), CVT, Cond.getOperand(0), - Cond.getOperand(1), NotCC); - return DAG.getSelect(SDLoc(N), VT, NCond, NewBO, F0); + SDValue NCond = + DAG.getSetCC(DL, CVT, Cond.getOperand(0), Cond.getOperand(1), NotCC); + return DAG.getSelect(DL, VT, NCond, NewBO, F0); } - return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO); + return DAG.getSelect(DL, VT, Cond, F0, NewBO); } // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo) && TLI.shouldFoldSelectWithIdentityConstant(Opcode, VT, SelOpcode, N0, TVal)) { SDValue F0 = DAG.getFreeze(N0); - SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags()); - return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0); + SDValue NewBO = DAG.getNode(Opcode, DL, VT, F0, TVal, N->getFlags()); + return DAG.getSelect(DL, VT, Cond, NewBO, F0); } return SDValue(); diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll index 3292c064bb4e9..4fe9db73f524f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll @@ -814,7 +814,7 @@ define @vadd_vv_mask_nxv8i32( %va, %vc } -define @vadd_vv_mask_nxv8i32_novmerge( %va, %vb, +define @vadd_vv_mask_nxv8i32_novmerge( %va, %vb, %vc) { ; CHECK-LABEL: vadd_vv_mask_nxv8i32_novmerge: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu @@ -822,7 +822,6 @@ define @vadd_vv_mask_nxv8i32_novmerge( %va, ; CHECK-NEXT: vadd.vv v16, v16, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %vc) { %mask = icmp eq %va, splat (i32 1) %vs = select %mask, zeroinitializer , %vb %vr = add nsw %vc, %vs