Skip to content

Conversation

preames
Copy link
Collaborator

@preames preames commented Sep 4, 2025

This is purely stylistic, but I think makes the code easier to follow.

It isn't quite NFC because it undoes the arithmetic lowering for the select c, simm12, 0 cases for a processor with both conditional move forwarding and zicond.

This is purely stylistic, but I think makes the code easier to follow.

It isn't quite NFC because it undoes the airthmetic lowering for the
select c, simm12, 0 cases for a processor with both conditional
move forwarding and zicond.
@preames preames requested a review from topperc September 4, 2025 22:08
@preames
Copy link
Collaborator Author

preames commented Sep 4, 2025

It isn't quite NFC because it undoes the arithmetic lowering for the select c, simm12, 0 cases for a processor with both conditional move forwarding and zicond.

@topperc It's not really clear what I should do for the configuration mentioned above. Do you actually have such a processor? If so, how do you want the codegen to look for this case?

@llvmbot
Copy link
Member

llvmbot commented Sep 4, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Philip Reames (preames)

Changes

This is purely stylistic, but I think makes the code easier to follow.

It isn't quite NFC because it undoes the arithmetic lowering for the select c, simm12, 0 cases for a processor with both conditional move forwarding and zicond.


Full diff: https://github.com/llvm/llvm-project/pull/156974.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+18-30)
  • (modified) llvm/test/CodeGen/RISCV/cmov-branch-opt.ll (+4-6)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f8ec1be1fd8d6..d26f5ab8f3e88 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9106,8 +9106,12 @@ static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
   return std::nullopt;
 }
 
-static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
-                                    const RISCVSubtarget &Subtarget) {
+static bool isSimm12Constant(SDValue V) {
+  return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
+}
+
+static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG,
+                                  const RISCVSubtarget &Subtarget) {
   SDValue CondV = N->getOperand(0);
   SDValue TrueV = N->getOperand(1);
   SDValue FalseV = N->getOperand(2);
@@ -9127,14 +9131,18 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
       return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
     }
 
+    const bool HasCZero =
+        VT.isScalarInteger() &&
+        (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps());
+
     // (select c, 0, y) -> (c-1) & y
-    if (isNullConstant(TrueV)) {
-      SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
-                                DAG.getAllOnesConstant(DL, VT));
+    if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
+      SDValue Neg =
+          DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
       return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
     }
     // (select c, y, 0) -> -c & y
-    if (isNullConstant(FalseV)) {
+    if (isNullConstant(FalseV) && (!HasCZero || isSimm12Constant(TrueV))) {
       SDValue Neg = DAG.getNegative(CondV, DL, VT);
       return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
     }
@@ -9240,10 +9248,6 @@ foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
 }
 
-static bool isSimm12Constant(SDValue V) {
-  return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
-}
-
 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDValue CondV = Op.getOperand(0);
   SDValue TrueV = Op.getOperand(1);
@@ -9259,6 +9263,10 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
   }
 
+  // Try some other optimizations before falling back to generic lowering.
+  if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
+    return V;
+
   // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
   // nodes to implement the SELECT. Performing the lowering here allows for
   // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
@@ -9266,19 +9274,6 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
       VT.isScalarInteger()) {
 
-    // select c, simm12, 0 -> andi (sub x0, c), simm12
-    if (isSimm12Constant(TrueV) && isNullConstant(FalseV)) {
-      SDValue Mask = DAG.getNegative(CondV, DL, VT);
-      return DAG.getNode(ISD::AND, DL, VT, TrueV, Mask);
-    }
-
-    // select c, 0, simm12 -> andi (addi c, -1), simm12
-    if (isNullConstant(TrueV) && isSimm12Constant(FalseV)) {
-      SDValue Mask = DAG.getNode(ISD::ADD, DL, VT, CondV,
-                                 DAG.getSignedConstant(-1, DL, XLenVT));
-      return DAG.getNode(ISD::AND, DL, VT, FalseV, Mask);
-    }
-
     // (select c, t, 0) -> (czero_eqz t, c)
     if (isNullConstant(FalseV))
       return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
@@ -9332,10 +9327,6 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
           DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
     }
 
-    // Try some other optimizations before falling back to generic lowering.
-    if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
-      return V;
-
     // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
     // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
     if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
@@ -9438,9 +9429,6 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
           SDNodeFlags::Disjoint);
   }
 
-  if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
-    return V;
-
   if (Op.hasOneUse()) {
     unsigned UseOpc = Op->user_begin()->getOpcode();
     if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
diff --git a/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll b/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll
index 351b02494ae85..6608874286e34 100644
--- a/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/cmov-branch-opt.ll
@@ -149,9 +149,8 @@ define signext i32 @test4(i32 signext %x, i32 signext %y, i32 signext %z) {
 ;
 ; CMOV-ZICOND-LABEL: test4:
 ; CMOV-ZICOND:       # %bb.0:
-; CMOV-ZICOND-NEXT:    snez a0, a2
-; CMOV-ZICOND-NEXT:    addi a0, a0, -1
-; CMOV-ZICOND-NEXT:    andi a0, a0, 3
+; CMOV-ZICOND-NEXT:    li a0, 3
+; CMOV-ZICOND-NEXT:    czero.nez a0, a0, a2
 ; CMOV-ZICOND-NEXT:    ret
 ;
 ; SFB-NOZICOND-LABEL: test4:
@@ -165,9 +164,8 @@ define signext i32 @test4(i32 signext %x, i32 signext %y, i32 signext %z) {
 ;
 ; SFB-ZICOND-LABEL: test4:
 ; SFB-ZICOND:       # %bb.0:
-; SFB-ZICOND-NEXT:    snez a0, a2
-; SFB-ZICOND-NEXT:    addi a0, a0, -1
-; SFB-ZICOND-NEXT:    andi a0, a0, 3
+; SFB-ZICOND-NEXT:    li a0, 3
+; SFB-ZICOND-NEXT:    czero.nez a0, a0, a2
 ; SFB-ZICOND-NEXT:    ret
   %c = icmp eq i32 %z, 0
   %a = select i1 %c, i32 3, i32 0

@topperc
Copy link
Collaborator

topperc commented Sep 4, 2025

It isn't quite NFC because it undoes the arithmetic lowering for the select c, simm12, 0 cases for a processor with both conditional move forwarding and zicond.

@topperc It's not really clear what I should do for the configuration mentioned above. Do you actually have such a processor? If so, how do you want the codegen to look for this case?

To make sure I understand, this is the arithmetic lowering that was added in today in #156957? So we go back to what we had before today?

@preames
Copy link
Collaborator Author

preames commented Sep 5, 2025

It isn't quite NFC because it undoes the arithmetic lowering for the select c, simm12, 0 cases for a processor with both conditional move forwarding and zicond.

@topperc It's not really clear what I should do for the configuration mentioned above. Do you actually have such a processor? If so, how do you want the codegen to look for this case?

To make sure I understand, this is the arithmetic lowering that was added in today in #156957? So we go back to what we had before today?

Yep, exactly.

@topperc
Copy link
Collaborator

topperc commented Sep 6, 2025

It isn't quite NFC because it undoes the arithmetic lowering for the select c, simm12, 0 cases for a processor with both conditional move forwarding and zicond.

@topperc It's not really clear what I should do for the configuration mentioned above. Do you actually have such a processor? If so, how do you want the codegen to look for this case?

To make sure I understand, this is the arithmetic lowering that was added in today in #156957? So we go back to what we had before today?

Yep, exactly.

I'll take a note to investigate this on our side.

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@preames preames merged commit 1455b3c into llvm:main Sep 6, 2025
11 checks passed
@preames preames deleted the pr-riscv-select-lowering-pull-binop-early branch September 6, 2025 01:13
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 6, 2025

LLVM Buildbot has detected a new failure on builder cross-project-tests-sie-ubuntu running on doug-worker-1a while building llvm at step 6 "test-build-unified-tree-check-cross-project".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/181/builds/27384

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-cross-project) failure: test (failure)
******************** TEST 'cross-project-tests :: debuginfo-tests/dexter-tests/memvars/ctrl-flow.c' FAILED ********************
Exit Code: 2

Command Output (stdout):
--
ctrl-flow.c: nan/nan (nan) [Command '['/usr/bin/python3.8', '/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/llvm-project/cross-project-tests/debuginfo-tests/dexter/dex/../dexter.py', 'run-debugger-internal-', '/tmp/lit-tmp-xkaaj2l4/dexter/tmp_kgxsmgg/tmp9l1bv01k', '--working-directory=/tmp/lit-tmp-xkaaj2l4/dexter/tmp_kgxsmgg', '--unittest=off', '--indent-timer-level=3']' returned non-zero exit status 1.]


--
Command Output (stderr):
--
/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/build/bin/clang -std=gnu11 -O2 -glldb /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/llvm-project/cross-project-tests/debuginfo-tests/dexter-tests/memvars/ctrl-flow.c -o /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/build/projects/cross-project-tests/debuginfo-tests/dexter-tests/memvars/Output/ctrl-flow.c.tmp # RUN: at line 3
+ /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/build/bin/clang -std=gnu11 -O2 -glldb /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/llvm-project/cross-project-tests/debuginfo-tests/dexter-tests/memvars/ctrl-flow.c -o /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/build/projects/cross-project-tests/debuginfo-tests/dexter-tests/memvars/Output/ctrl-flow.c.tmp
"/usr/bin/python3.8" "/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/llvm-project/cross-project-tests/debuginfo-tests/dexter/dexter.py" test --fail-lt 1.0 -w --lldb-executable "/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/build/bin/lldb-dap" --debugger lldb-dap --binary /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/build/projects/cross-project-tests/debuginfo-tests/dexter-tests/memvars/Output/ctrl-flow.c.tmp -- /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/llvm-project/cross-project-tests/debuginfo-tests/dexter-tests/memvars/ctrl-flow.c # RUN: at line 4
+ /usr/bin/python3.8 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/llvm-project/cross-project-tests/debuginfo-tests/dexter/dexter.py test --fail-lt 1.0 -w --lldb-executable /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/build/bin/lldb-dap --debugger lldb-dap --binary /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/build/projects/cross-project-tests/debuginfo-tests/dexter-tests/memvars/Output/ctrl-flow.c.tmp -- /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu/llvm-project/cross-project-tests/debuginfo-tests/dexter-tests/memvars/ctrl-flow.c


****************************************


@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 6, 2025

LLVM Buildbot has detected a new failure on builder mlir-nvidia-gcc7 running on mlir-nvidia while building llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/17977

Here is the relevant piece of the build log for the reference
Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x5d3db4eb56c0 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 42]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia-gcc7/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x5d3db4eb56c0 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 42] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants