-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[DAG] Fold logic of zero-checks to multiplication for MinSize #171805
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-risc-v Author: None (TelGome) ChangesAddresses #164501 Full diff: https://github.com/llvm/llvm-project/pull/171805.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
old mode 100644
new mode 100755
index 6a99d4e29b64f..7861f0937459d
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -631,6 +631,7 @@ namespace {
SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
+ SDValue foldLogicSetCCToMul(SDNode *N, const SDLoc &DL);
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
@@ -6648,6 +6649,59 @@ static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
return ISD::DELETED_NODE;
}
+// Fold the following patterns for small integers in -Oz mode.
+// (X == 0) || (Y == 0) --> (X * Y) == 0
+// (X != 0) && (Y != 0) --> (X * Y) != 0
+SDValue DAGCombiner::foldLogicSetCCToMul(SDNode *N, const SDLoc &DL) {
+ if (OptLevel == CodeGenOptLevel::None ||
+ !DAG.getMachineFunction().getFunction().hasMinSize())
+ return SDValue();
+
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ ISD::CondCode ExpectedCC;
+ if (Opcode == ISD::OR) {
+ ExpectedCC = ISD::SETEQ;
+ } else if (Opcode == ISD::AND) {
+ ExpectedCC = ISD::SETNE;
+ } else {
+ return SDValue();
+ }
+
+ if (N0.getOpcode() != ISD::SETCC || N1.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ SDValue A = N0.getOperand(0);
+ SDValue B = N1.getOperand(0);
+ SDValue C0 = N0.getOperand(1);
+ SDValue C1 = N1.getOperand(1);
+ ISD::CondCode CC0 = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
+
+ if (CC0 != ExpectedCC || CC1 != ExpectedCC || !isNullConstant(C0) ||
+ !isNullConstant(C1) || A.getValueType() != B.getValueType() ||
+ !A.getValueType().isScalarInteger())
+ return SDValue();
+
+ unsigned BitWidth = A.getValueSizeInBits();
+ KnownBits KnownA = DAG.computeKnownBits(A);
+ KnownBits KnownB = DAG.computeKnownBits(B);
+
+ if (KnownA.countMaxActiveBits() + KnownB.countMaxActiveBits() > BitWidth)
+ return SDValue();
+
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ Flags.setNoSignedWrap(true);
+
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, A.getValueType(), A, B, Flags);
+
+ return DAG.getSetCC(DL, N->getValueType(0), Mul,
+ DAG.getConstant(0, DL, A.getValueType()), ExpectedCC);
+}
+
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
assert(
@@ -7555,6 +7609,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, DL, VT);
+ if (SDValue R = foldLogicSetCCToMul(N, DL))
+ return R;
+
if (SDValue R = foldAndOrOfSETCC(N, DAG))
return R;
@@ -8520,6 +8577,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
+ if (SDValue R = foldLogicSetCCToMul(N, DL))
+ return R;
+
if (SDValue R = foldAndOrOfSETCC(N, DAG))
return R;
diff --git a/llvm/test/CodeGen/RISCV/fold-zero-check-minsize.ll b/llvm/test/CodeGen/RISCV/fold-zero-check-minsize.ll
new file mode 100755
index 0000000000000..19fc9fb544fb4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fold-zero-check-minsize.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
+; RUN: | FileCheck %s
+
+define i1 @foldLogicSetCCToMul0(i16 zeroext %a, i16 zeroext %b) minsize {
+; CHECK-LABEL: foldLogicSetCCToMul0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: ret
+entry:
+ %cmp1 = icmp eq i16 %a, 0
+ %cmp2 = icmp eq i16 %b, 0
+ %or = or i1 %cmp1, %cmp2
+ ret i1 %or
+}
+
+define i1 @foldLogicSetCCToMul1(i16 zeroext %a, i16 zeroext %b) minsize {
+; CHECK-LABEL: foldLogicSetCCToMul1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: ret
+entry:
+ %cmp1 = icmp ne i16 %a, 0
+ %cmp2 = icmp ne i16 %b, 0
+ %and = and i1 %cmp1, %cmp2
+ ret i1 %and
+}
+
+define i1 @foldLogicSetCCToMul2(i64 %a, i64 %b) minsize {
+; CHECK-LABEL: foldLogicSetCCToMul2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: ret
+entry:
+ %cmp1 = icmp eq i64 %a, 0
+ %cmp2 = icmp eq i64 %b, 0
+ %or = or i1 %cmp1, %cmp2
+ ret i1 %or
+}
+
+define i1 @foldLogicSetCCToMul3(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: foldLogicSetCCToMul3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: ret
+entry:
+ %cmp1 = icmp eq i16 %a, 0
+ %cmp2 = icmp eq i16 %b, 0
+ %or = or i1 %cmp1, %cmp2
+ ret i1 %or
+}
\ No newline at end of file
|
lukel97
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the title say [DAGCombine]?
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs negative tests with mismatched compare operand and opcode, and non-0 constants
Yes, that makes sense. The issue is labeled |
I’ve uploaded a commit to address the mentioned issues. Please review when you have a moment. |
dtcxzyw
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have a TLI hook querying whether the target supports predicated instructions?
This optimization looks less profitable on ARM64 since it has conditional compares.
Addresses #164501