Skip to content

Commit

Permalink
[RISCV] Add DAG combine to pull xor with 1 through select idiom that …
Browse files Browse the repository at this point in the history
…uses czero_eqz/nez.

If we are selecting between two setccs that need to be legalized
with xor, the select will be legalized first. Detect this pattern
so we can pull the xor through to expose it to additional
optimizations.

We could generalize this to other operations, but those normally
get handled in DAG combine before select legalization.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D156159
  • Loading branch information
topperc committed Jul 25, 2023
1 parent b34a8b3 commit f6dc75c
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 24 deletions.
45 changes: 45 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10786,6 +10786,42 @@ static SDValue performANDCombine(SDNode *N,
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
}

// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
// FIXME: Generalize to other binary operators with same operand.
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::OR && "Unexpected opcode");

if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
N1.getOpcode() != RISCVISD::CZERO_NEZ ||
!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();

// Should have the same condition.
SDValue Cond = N0.getOperand(1);
if (Cond != N1.getOperand(1))
return SDValue();

SDValue TrueV = N0.getOperand(0);
SDValue FalseV = N1.getOperand(0);

if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
TrueV.getOperand(1) != FalseV.getOperand(1) ||
!isOneConstant(TrueV.getOperand(1)) ||
!TrueV.hasOneUse() || !FalseV.hasOneUse())
return SDValue();

EVT VT = N->getValueType(0);
SDLoc DL(N);

SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
Cond);
SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
Cond);
SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
}

static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
Expand All @@ -10797,6 +10833,15 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
if (SDValue V = combineDeMorganOfBoolean(N, DAG))
return V;

// Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
// We may be able to pull a common operation out of the true and false value.
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
return V;
if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
return V;

// fold (or (select cond, 0, y), x) ->
// (select cond, x, (or x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
Expand Down
40 changes: 16 additions & 24 deletions llvm/test/CodeGen/RISCV/condops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1305,17 +1305,15 @@ define i64 @setge(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: xor t0, a1, a3
; RV32ZICOND-NEXT: sltu a0, a0, a2
; RV32ZICOND-NEXT: xori a0, a0, 1
; RV32ZICOND-NEXT: czero.nez a0, a0, t0
; RV32ZICOND-NEXT: slt a1, a1, a3
; RV32ZICOND-NEXT: xori a1, a1, 1
; RV32ZICOND-NEXT: czero.eqz a1, a1, t0
; RV32ZICOND-NEXT: or a1, a1, a0
; RV32ZICOND-NEXT: czero.nez a0, a6, a1
; RV32ZICOND-NEXT: czero.eqz a2, a4, a1
; RV32ZICOND-NEXT: czero.eqz a0, a6, a1
; RV32ZICOND-NEXT: czero.nez a2, a4, a1
; RV32ZICOND-NEXT: or a0, a2, a0
; RV32ZICOND-NEXT: czero.nez a2, a7, a1
; RV32ZICOND-NEXT: czero.eqz a1, a5, a1
; RV32ZICOND-NEXT: czero.eqz a2, a7, a1
; RV32ZICOND-NEXT: czero.nez a1, a5, a1
; RV32ZICOND-NEXT: or a1, a1, a2
; RV32ZICOND-NEXT: ret
;
Expand Down Expand Up @@ -1449,17 +1447,15 @@ define i64 @setle(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: xor t0, a1, a3
; RV32ZICOND-NEXT: sltu a0, a2, a0
; RV32ZICOND-NEXT: xori a0, a0, 1
; RV32ZICOND-NEXT: czero.nez a0, a0, t0
; RV32ZICOND-NEXT: slt a1, a3, a1
; RV32ZICOND-NEXT: xori a1, a1, 1
; RV32ZICOND-NEXT: czero.eqz a1, a1, t0
; RV32ZICOND-NEXT: or a1, a1, a0
; RV32ZICOND-NEXT: czero.nez a0, a6, a1
; RV32ZICOND-NEXT: czero.eqz a2, a4, a1
; RV32ZICOND-NEXT: czero.eqz a0, a6, a1
; RV32ZICOND-NEXT: czero.nez a2, a4, a1
; RV32ZICOND-NEXT: or a0, a2, a0
; RV32ZICOND-NEXT: czero.nez a2, a7, a1
; RV32ZICOND-NEXT: czero.eqz a1, a5, a1
; RV32ZICOND-NEXT: czero.eqz a2, a7, a1
; RV32ZICOND-NEXT: czero.nez a1, a5, a1
; RV32ZICOND-NEXT: or a1, a1, a2
; RV32ZICOND-NEXT: ret
;
Expand Down Expand Up @@ -1593,17 +1589,15 @@ define i64 @setuge(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: xor t0, a1, a3
; RV32ZICOND-NEXT: sltu a0, a0, a2
; RV32ZICOND-NEXT: xori a0, a0, 1
; RV32ZICOND-NEXT: czero.nez a0, a0, t0
; RV32ZICOND-NEXT: sltu a1, a1, a3
; RV32ZICOND-NEXT: xori a1, a1, 1
; RV32ZICOND-NEXT: czero.eqz a1, a1, t0
; RV32ZICOND-NEXT: or a1, a1, a0
; RV32ZICOND-NEXT: czero.nez a0, a6, a1
; RV32ZICOND-NEXT: czero.eqz a2, a4, a1
; RV32ZICOND-NEXT: czero.eqz a0, a6, a1
; RV32ZICOND-NEXT: czero.nez a2, a4, a1
; RV32ZICOND-NEXT: or a0, a2, a0
; RV32ZICOND-NEXT: czero.nez a2, a7, a1
; RV32ZICOND-NEXT: czero.eqz a1, a5, a1
; RV32ZICOND-NEXT: czero.eqz a2, a7, a1
; RV32ZICOND-NEXT: czero.nez a1, a5, a1
; RV32ZICOND-NEXT: or a1, a1, a2
; RV32ZICOND-NEXT: ret
;
Expand Down Expand Up @@ -1737,17 +1731,15 @@ define i64 @setule(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: xor t0, a1, a3
; RV32ZICOND-NEXT: sltu a0, a2, a0
; RV32ZICOND-NEXT: xori a0, a0, 1
; RV32ZICOND-NEXT: czero.nez a0, a0, t0
; RV32ZICOND-NEXT: sltu a1, a3, a1
; RV32ZICOND-NEXT: xori a1, a1, 1
; RV32ZICOND-NEXT: czero.eqz a1, a1, t0
; RV32ZICOND-NEXT: or a1, a1, a0
; RV32ZICOND-NEXT: czero.nez a0, a6, a1
; RV32ZICOND-NEXT: czero.eqz a2, a4, a1
; RV32ZICOND-NEXT: czero.eqz a0, a6, a1
; RV32ZICOND-NEXT: czero.nez a2, a4, a1
; RV32ZICOND-NEXT: or a0, a2, a0
; RV32ZICOND-NEXT: czero.nez a2, a7, a1
; RV32ZICOND-NEXT: czero.eqz a1, a5, a1
; RV32ZICOND-NEXT: czero.eqz a2, a7, a1
; RV32ZICOND-NEXT: czero.nez a1, a5, a1
; RV32ZICOND-NEXT: or a1, a1, a2
; RV32ZICOND-NEXT: ret
;
Expand Down

0 comments on commit f6dc75c

Please sign in to comment.