Skip to content

Commit

Permalink
[RISCV] Fold (select setcc, setcc, setcc) into and/or instructions
Browse files Browse the repository at this point in the history
This patch folds `(select setcc, setcc, setcc)` into and/or instructions when truev/falsev is equal to or the inverse of condv.

(select x, x, y) -> x | y https://alive2.llvm.org/ce/z/36Ud3Z
(select !x, x, y) -> x & y https://alive2.llvm.org/ce/z/mYYoGF
(select x, y, x) -> x & y https://alive2.llvm.org/ce/z/MAZ--X
(select !x, y, x) -> x | y https://alive2.llvm.org/ce/z/ct7By5

It is the follow-up improvement of D150177, which optimizes the code of signed truncation check patterns without Zbb.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D150286
  • Loading branch information
dtcxzyw committed May 12, 2023
1 parent 478739b commit af161ff
Show file tree
Hide file tree
Showing 6 changed files with 306 additions and 343 deletions.
48 changes: 48 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5349,6 +5349,32 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
return Addr;
}

// Return true if Val is equal to (setcc LHS, RHS, CC).
// Return false if Val is the inverse of (setcc LHS, RHS, CC).
// Otherwise, return std::nullopt.
static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
ISD::CondCode CC, SDValue Val) {
assert(Val->getOpcode() == ISD::SETCC);
SDValue LHS2 = Val.getOperand(0);
SDValue RHS2 = Val.getOperand(1);
ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();

if (LHS == LHS2 && RHS == RHS2) {
if (CC == CC2)
return true;
if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
return false;
} else if (LHS == RHS2 && RHS == LHS2) {
CC2 = ISD::getSetCCSwappedOperands(CC2);
if (CC == CC2)
return true;
if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
return false;
}

return std::nullopt;
}

static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue CondV = N->getOperand(0);
Expand Down Expand Up @@ -5383,6 +5409,28 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
}
}

// Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
// when both truev and falsev are also setcc.
if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
FalseV.getOpcode() == ISD::SETCC) {
SDValue LHS = CondV.getOperand(0);
SDValue RHS = CondV.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();

// (select x, x, y) -> x | y
// (select !x, x, y) -> x & y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
FalseV);
}
// (select x, y, x) -> x & y
// (select !x, y, x) -> x | y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
FalseV);
}
}

return SDValue();
}

Expand Down
18 changes: 6 additions & 12 deletions llvm/test/CodeGen/RISCV/forced-atomics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2700,26 +2700,20 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind {
; RV32-NEXT: call __atomic_compare_exchange_8@plt
; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a4, 0(sp)
; RV32-NEXT: bnez a0, .LBB51_6
; RV32-NEXT: bnez a0, .LBB51_4
; RV32-NEXT: .LBB51_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: beqz a1, .LBB51_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
; RV32-NEXT: snez a0, a1
; RV32-NEXT: sltiu a2, a4, 2
; RV32-NEXT: xori a2, a2, 1
; RV32-NEXT: or a0, a2, a0
; RV32-NEXT: mv a2, a4
; RV32-NEXT: bnez a0, .LBB51_1
; RV32-NEXT: j .LBB51_5
; RV32-NEXT: .LBB51_4: # in Loop: Header=BB51_2 Depth=1
; RV32-NEXT: sltiu a0, a4, 2
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: mv a2, a4
; RV32-NEXT: bnez a0, .LBB51_1
; RV32-NEXT: .LBB51_5: # %atomicrmw.start
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
; RV32-NEXT: li a2, 1
; RV32-NEXT: j .LBB51_1
; RV32-NEXT: .LBB51_6: # %atomicrmw.end
; RV32-NEXT: .LBB51_4: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
Expand Down
220 changes: 96 additions & 124 deletions llvm/test/CodeGen/RISCV/fpclamptosat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1320,44 +1320,37 @@ define i64 @ustest_f64i64(double %x) {
; RV32IF-NEXT: .LBB20_2:
; RV32IF-NEXT: seqz a2, a0
; RV32IF-NEXT: .LBB20_3: # %entry
; RV32IF-NEXT: lw a3, 12(sp)
; RV32IF-NEXT: xori a4, a0, 1
; RV32IF-NEXT: or a4, a4, a1
; RV32IF-NEXT: seqz a4, a4
; RV32IF-NEXT: addi a4, a4, -1
; RV32IF-NEXT: and a2, a4, a2
; RV32IF-NEXT: neg a4, a2
; RV32IF-NEXT: bnez a2, .LBB20_5
; RV32IF-NEXT: xori a3, a0, 1
; RV32IF-NEXT: or a3, a3, a1
; RV32IF-NEXT: seqz a3, a3
; RV32IF-NEXT: addi a3, a3, -1
; RV32IF-NEXT: and a3, a3, a2
; RV32IF-NEXT: neg a2, a3
; RV32IF-NEXT: bnez a3, .LBB20_5
; RV32IF-NEXT: # %bb.4: # %entry
; RV32IF-NEXT: li a0, 1
; RV32IF-NEXT: .LBB20_5: # %entry
; RV32IF-NEXT: lw a5, 8(sp)
; RV32IF-NEXT: and a2, a4, a1
; RV32IF-NEXT: and a1, a4, a3
; RV32IF-NEXT: beqz a2, .LBB20_8
; RV32IF-NEXT: lw a3, 8(sp)
; RV32IF-NEXT: lw a4, 12(sp)
; RV32IF-NEXT: and a5, a2, a1
; RV32IF-NEXT: beqz a5, .LBB20_7
; RV32IF-NEXT: # %bb.6: # %entry
; RV32IF-NEXT: sgtz a3, a2
; RV32IF-NEXT: and a4, a4, a5
; RV32IF-NEXT: bnez a1, .LBB20_9
; RV32IF-NEXT: sgtz a1, a5
; RV32IF-NEXT: j .LBB20_8
; RV32IF-NEXT: .LBB20_7:
; RV32IF-NEXT: snez a5, a4
; RV32IF-NEXT: or a0, a0, a2
; RV32IF-NEXT: snez a1, a0
; RV32IF-NEXT: .LBB20_8: # %entry
; RV32IF-NEXT: and a4, a2, a4
; RV32IF-NEXT: or a0, a0, a5
; RV32IF-NEXT: and a2, a2, a3
; RV32IF-NEXT: bnez a0, .LBB20_10
; RV32IF-NEXT: j .LBB20_11
; RV32IF-NEXT: .LBB20_8:
; RV32IF-NEXT: snez a3, a0
; RV32IF-NEXT: and a4, a4, a5
; RV32IF-NEXT: beqz a1, .LBB20_7
; RV32IF-NEXT: .LBB20_9: # %entry
; RV32IF-NEXT: snez a5, a1
; RV32IF-NEXT: or a0, a0, a2
; RV32IF-NEXT: beqz a0, .LBB20_11
; RV32IF-NEXT: # %bb.9:
; RV32IF-NEXT: or a0, a2, a4
; RV32IF-NEXT: snez a1, a0
; RV32IF-NEXT: .LBB20_10: # %entry
; RV32IF-NEXT: mv a5, a3
; RV32IF-NEXT: .LBB20_11: # %entry
; RV32IF-NEXT: neg a2, a5
; RV32IF-NEXT: and a0, a2, a4
; RV32IF-NEXT: and a1, a2, a1
; RV32IF-NEXT: neg a1, a1
; RV32IF-NEXT: and a0, a1, a2
; RV32IF-NEXT: and a1, a1, a4
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
Expand Down Expand Up @@ -1406,44 +1399,37 @@ define i64 @ustest_f64i64(double %x) {
; RV32IFD-NEXT: .LBB20_2:
; RV32IFD-NEXT: seqz a2, a0
; RV32IFD-NEXT: .LBB20_3: # %entry
; RV32IFD-NEXT: lw a3, 12(sp)
; RV32IFD-NEXT: xori a4, a0, 1
; RV32IFD-NEXT: or a4, a4, a1
; RV32IFD-NEXT: seqz a4, a4
; RV32IFD-NEXT: addi a4, a4, -1
; RV32IFD-NEXT: and a2, a4, a2
; RV32IFD-NEXT: neg a4, a2
; RV32IFD-NEXT: bnez a2, .LBB20_5
; RV32IFD-NEXT: xori a3, a0, 1
; RV32IFD-NEXT: or a3, a3, a1
; RV32IFD-NEXT: seqz a3, a3
; RV32IFD-NEXT: addi a3, a3, -1
; RV32IFD-NEXT: and a3, a3, a2
; RV32IFD-NEXT: neg a2, a3
; RV32IFD-NEXT: bnez a3, .LBB20_5
; RV32IFD-NEXT: # %bb.4: # %entry
; RV32IFD-NEXT: li a0, 1
; RV32IFD-NEXT: .LBB20_5: # %entry
; RV32IFD-NEXT: lw a5, 8(sp)
; RV32IFD-NEXT: and a2, a4, a1
; RV32IFD-NEXT: and a1, a4, a3
; RV32IFD-NEXT: beqz a2, .LBB20_8
; RV32IFD-NEXT: lw a3, 8(sp)
; RV32IFD-NEXT: lw a4, 12(sp)
; RV32IFD-NEXT: and a5, a2, a1
; RV32IFD-NEXT: beqz a5, .LBB20_7
; RV32IFD-NEXT: # %bb.6: # %entry
; RV32IFD-NEXT: sgtz a3, a2
; RV32IFD-NEXT: and a4, a4, a5
; RV32IFD-NEXT: bnez a1, .LBB20_9
; RV32IFD-NEXT: sgtz a1, a5
; RV32IFD-NEXT: j .LBB20_8
; RV32IFD-NEXT: .LBB20_7:
; RV32IFD-NEXT: snez a5, a4
; RV32IFD-NEXT: or a0, a0, a2
; RV32IFD-NEXT: snez a1, a0
; RV32IFD-NEXT: .LBB20_8: # %entry
; RV32IFD-NEXT: and a4, a2, a4
; RV32IFD-NEXT: or a0, a0, a5
; RV32IFD-NEXT: and a2, a2, a3
; RV32IFD-NEXT: bnez a0, .LBB20_10
; RV32IFD-NEXT: j .LBB20_11
; RV32IFD-NEXT: .LBB20_8:
; RV32IFD-NEXT: snez a3, a0
; RV32IFD-NEXT: and a4, a4, a5
; RV32IFD-NEXT: beqz a1, .LBB20_7
; RV32IFD-NEXT: .LBB20_9: # %entry
; RV32IFD-NEXT: snez a5, a1
; RV32IFD-NEXT: or a0, a0, a2
; RV32IFD-NEXT: beqz a0, .LBB20_11
; RV32IFD-NEXT: # %bb.9:
; RV32IFD-NEXT: or a0, a2, a4
; RV32IFD-NEXT: snez a1, a0
; RV32IFD-NEXT: .LBB20_10: # %entry
; RV32IFD-NEXT: mv a5, a3
; RV32IFD-NEXT: .LBB20_11: # %entry
; RV32IFD-NEXT: neg a2, a5
; RV32IFD-NEXT: and a0, a2, a4
; RV32IFD-NEXT: and a1, a2, a1
; RV32IFD-NEXT: neg a1, a1
; RV32IFD-NEXT: and a0, a1, a2
; RV32IFD-NEXT: and a1, a1, a4
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
Expand Down Expand Up @@ -1602,44 +1588,37 @@ define i64 @ustest_f32i64(float %x) {
; RV32-NEXT: .LBB23_2:
; RV32-NEXT: seqz a2, a0
; RV32-NEXT: .LBB23_3: # %entry
; RV32-NEXT: lw a3, 12(sp)
; RV32-NEXT: xori a4, a0, 1
; RV32-NEXT: or a4, a4, a1
; RV32-NEXT: seqz a4, a4
; RV32-NEXT: addi a4, a4, -1
; RV32-NEXT: and a2, a4, a2
; RV32-NEXT: neg a4, a2
; RV32-NEXT: bnez a2, .LBB23_5
; RV32-NEXT: xori a3, a0, 1
; RV32-NEXT: or a3, a3, a1
; RV32-NEXT: seqz a3, a3
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a3, a3, a2
; RV32-NEXT: neg a2, a3
; RV32-NEXT: bnez a3, .LBB23_5
; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: li a0, 1
; RV32-NEXT: .LBB23_5: # %entry
; RV32-NEXT: lw a5, 8(sp)
; RV32-NEXT: and a2, a4, a1
; RV32-NEXT: and a1, a4, a3
; RV32-NEXT: beqz a2, .LBB23_8
; RV32-NEXT: lw a3, 8(sp)
; RV32-NEXT: lw a4, 12(sp)
; RV32-NEXT: and a5, a2, a1
; RV32-NEXT: beqz a5, .LBB23_7
; RV32-NEXT: # %bb.6: # %entry
; RV32-NEXT: sgtz a3, a2
; RV32-NEXT: and a4, a4, a5
; RV32-NEXT: bnez a1, .LBB23_9
; RV32-NEXT: sgtz a1, a5
; RV32-NEXT: j .LBB23_8
; RV32-NEXT: .LBB23_7:
; RV32-NEXT: snez a5, a4
; RV32-NEXT: or a0, a0, a2
; RV32-NEXT: snez a1, a0
; RV32-NEXT: .LBB23_8: # %entry
; RV32-NEXT: and a4, a2, a4
; RV32-NEXT: or a0, a0, a5
; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: bnez a0, .LBB23_10
; RV32-NEXT: j .LBB23_11
; RV32-NEXT: .LBB23_8:
; RV32-NEXT: snez a3, a0
; RV32-NEXT: and a4, a4, a5
; RV32-NEXT: beqz a1, .LBB23_7
; RV32-NEXT: .LBB23_9: # %entry
; RV32-NEXT: snez a5, a1
; RV32-NEXT: or a0, a0, a2
; RV32-NEXT: beqz a0, .LBB23_11
; RV32-NEXT: # %bb.9:
; RV32-NEXT: or a0, a2, a4
; RV32-NEXT: snez a1, a0
; RV32-NEXT: .LBB23_10: # %entry
; RV32-NEXT: mv a5, a3
; RV32-NEXT: .LBB23_11: # %entry
; RV32-NEXT: neg a2, a5
; RV32-NEXT: and a0, a2, a4
; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: neg a1, a1
; RV32-NEXT: and a0, a1, a2
; RV32-NEXT: and a1, a1, a4
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
Expand Down Expand Up @@ -1865,44 +1844,37 @@ define i64 @ustest_f16i64(half %x) {
; RV32-NEXT: .LBB26_2:
; RV32-NEXT: seqz a2, a0
; RV32-NEXT: .LBB26_3: # %entry
; RV32-NEXT: lw a3, 12(sp)
; RV32-NEXT: xori a4, a0, 1
; RV32-NEXT: or a4, a4, a1
; RV32-NEXT: seqz a4, a4
; RV32-NEXT: addi a4, a4, -1
; RV32-NEXT: and a2, a4, a2
; RV32-NEXT: neg a4, a2
; RV32-NEXT: bnez a2, .LBB26_5
; RV32-NEXT: xori a3, a0, 1
; RV32-NEXT: or a3, a3, a1
; RV32-NEXT: seqz a3, a3
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a3, a3, a2
; RV32-NEXT: neg a2, a3
; RV32-NEXT: bnez a3, .LBB26_5
; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: li a0, 1
; RV32-NEXT: .LBB26_5: # %entry
; RV32-NEXT: lw a5, 8(sp)
; RV32-NEXT: and a2, a4, a1
; RV32-NEXT: and a1, a4, a3
; RV32-NEXT: beqz a2, .LBB26_8
; RV32-NEXT: lw a3, 8(sp)
; RV32-NEXT: lw a4, 12(sp)
; RV32-NEXT: and a5, a2, a1
; RV32-NEXT: beqz a5, .LBB26_7
; RV32-NEXT: # %bb.6: # %entry
; RV32-NEXT: sgtz a3, a2
; RV32-NEXT: and a4, a4, a5
; RV32-NEXT: bnez a1, .LBB26_9
; RV32-NEXT: sgtz a1, a5
; RV32-NEXT: j .LBB26_8
; RV32-NEXT: .LBB26_7:
; RV32-NEXT: snez a5, a4
; RV32-NEXT: or a0, a0, a2
; RV32-NEXT: snez a1, a0
; RV32-NEXT: .LBB26_8: # %entry
; RV32-NEXT: and a4, a2, a4
; RV32-NEXT: or a0, a0, a5
; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: bnez a0, .LBB26_10
; RV32-NEXT: j .LBB26_11
; RV32-NEXT: .LBB26_8:
; RV32-NEXT: snez a3, a0
; RV32-NEXT: and a4, a4, a5
; RV32-NEXT: beqz a1, .LBB26_7
; RV32-NEXT: .LBB26_9: # %entry
; RV32-NEXT: snez a5, a1
; RV32-NEXT: or a0, a0, a2
; RV32-NEXT: beqz a0, .LBB26_11
; RV32-NEXT: # %bb.9:
; RV32-NEXT: or a0, a2, a4
; RV32-NEXT: snez a1, a0
; RV32-NEXT: .LBB26_10: # %entry
; RV32-NEXT: mv a5, a3
; RV32-NEXT: .LBB26_11: # %entry
; RV32-NEXT: neg a2, a5
; RV32-NEXT: and a0, a2, a4
; RV32-NEXT: and a1, a2, a1
; RV32-NEXT: neg a1, a1
; RV32-NEXT: and a0, a1, a2
; RV32-NEXT: and a1, a1, a4
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
Expand Down
Loading

0 comments on commit af161ff

Please sign in to comment.