-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAG] Matched FixedWidth pattern for ISD::AVGFLOORU #84903
Conversation
@llvm/pr-subscribers-llvm-selectiondag Author: Shourya Goel (Sh0g0-1758) ChangesFixes: #84749 NOTE: The Tests are not written properly as of now, please suggest changes in them. Full diff: https://github.com/llvm/llvm-project/pull/84903.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5476ef87971436..f6132e9627bf8e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2826,6 +2826,36 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return SDValue();
}
+// Attempt to form ext(avgflooru(A, B)) from add(and(A, B), lshr(xor(A, B), 1))
+static SDValue combineFixedwidthToAVG(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::ADD && "ADD node is required here");
+ SDValue And = N->getOperand(0);
+ SDValue Lshr = N->getOperand(1);
+ if (And.getOpcode() != ISD::AND || Lshr.getOpcode() != ISD::SRL)
+ return SDValue();
+ SDValue Xor = Lshr.getOperand(0);
+ if (Xor.getOpcode() != ISD::XOR)
+ return SDValue();
+ SDValue And1 = And.getOperand(0);
+ SDValue And2 = And.getOperand(1);
+ SDValue Xor1 = Xor.getOperand(0);
+ SDValue Xor2 = Xor.getOperand(1);
+ if(Xor1 != And1 && Xor2 != And2)
+ return SDValue();
+ // Is the right shift using an immediate value of 1?
+ ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
+ if (!N1C || N1C->getAPIntValue() != 1)
+ return SDValue();
+ EVT VT = And.getValueType();
+ SDLoc DL(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustom(ISD::AVGFLOORU, VT))
+ return SDValue();
+ return DAG.getNode(ISD::AVGFLOORU, DL, VT,
+ DAG.getExtOrTrunc(false, And1, DL, VT),
+ DAG.getExtOrTrunc(false, And2, DL, VT));
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2841,6 +2871,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
+ // Try to match AVG fixedwidth pattern
+ if (SDValue V = combineFixedwidthToAVG(N, DAG))
+ return V;
+
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
DAG.haveNoCommonBitsSet(N0, N1))
diff --git a/llvm/test/CodeGen/AArch64/dag-fixedwidth.ll b/llvm/test/CodeGen/AArch64/dag-fixedwidth.ll
new file mode 100644
index 00000000000000..7000da89900ec3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dag-fixedwidth.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+define i4 @fixedwidth(i4 %a0, i4 %a1) {
+; CHECK-LABEL: fixedwidth:
+ %and = and i4 %a0, %a1
+ %xor = xor i4 %a0, %a1
+ %srl = lshr i4 %xor, 1
+ %res = add i4 %and, %srl
+ ret i4 %res
+}
\ No newline at end of file
|
@llvm/pr-subscribers-backend-aarch64 Author: Shourya Goel (Sh0g0-1758) ChangesFixes: #84749 NOTE: The Tests are not written properly as of now, please suggest changes in them. Full diff: https://github.com/llvm/llvm-project/pull/84903.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5476ef87971436..f6132e9627bf8e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2826,6 +2826,36 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return SDValue();
}
+// Attempt to form ext(avgflooru(A, B)) from add(and(A, B), lshr(xor(A, B), 1))
+static SDValue combineFixedwidthToAVG(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::ADD && "ADD node is required here");
+ SDValue And = N->getOperand(0);
+ SDValue Lshr = N->getOperand(1);
+ if (And.getOpcode() != ISD::AND || Lshr.getOpcode() != ISD::SRL)
+ return SDValue();
+ SDValue Xor = Lshr.getOperand(0);
+ if (Xor.getOpcode() != ISD::XOR)
+ return SDValue();
+ SDValue And1 = And.getOperand(0);
+ SDValue And2 = And.getOperand(1);
+ SDValue Xor1 = Xor.getOperand(0);
+ SDValue Xor2 = Xor.getOperand(1);
+ if(Xor1 != And1 && Xor2 != And2)
+ return SDValue();
+ // Is the right shift using an immediate value of 1?
+ ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1));
+ if (!N1C || N1C->getAPIntValue() != 1)
+ return SDValue();
+ EVT VT = And.getValueType();
+ SDLoc DL(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustom(ISD::AVGFLOORU, VT))
+ return SDValue();
+ return DAG.getNode(ISD::AVGFLOORU, DL, VT,
+ DAG.getExtOrTrunc(false, And1, DL, VT),
+ DAG.getExtOrTrunc(false, And2, DL, VT));
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2841,6 +2871,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
+ // Try to match AVG fixedwidth pattern
+ if (SDValue V = combineFixedwidthToAVG(N, DAG))
+ return V;
+
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
DAG.haveNoCommonBitsSet(N0, N1))
diff --git a/llvm/test/CodeGen/AArch64/dag-fixedwidth.ll b/llvm/test/CodeGen/AArch64/dag-fixedwidth.ll
new file mode 100644
index 00000000000000..7000da89900ec3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dag-fixedwidth.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+define i4 @fixedwidth(i4 %a0, i4 %a1) {
+; CHECK-LABEL: fixedwidth:
+ %and = and i4 %a0, %a1
+ %xor = xor i4 %a0, %a1
+ %srl = lshr i4 %xor, 1
+ %res = add i4 %and, %srl
+ ret i4 %res
+}
\ No newline at end of file
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
// Is the right shift using an immediate value of 1? | ||
ConstantSDNode *N1C = isConstOrConstSplat(Lshr.getOperand(1)); | ||
if (!N1C || N1C->getAPIntValue() != 1) | ||
return SDValue(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should be able to vastly simplify this matching code by using the sd_match pattern helper - #84759 will introduce this very soon, but if you're willing to try it you can just add the include and using lines from the patch you should be able to use it here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#84759 has now been committed if you want to try it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I would much rather do it as a separate issue after implementing the fixedwidthpatterns for both floor and ceil. But if you want it to be part of this PR only, please do let me know.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think using sd_match is the way to go from the start
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure thing. Please push the code for m_And / mXor / m_Or
and I will update accordingly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
saw the updates, updating it.
assert(N->getOpcode() == ISD::ADD && "ADD node is required here"); | ||
SDValue And = N->getOperand(0); | ||
SDValue Lshr = N->getOperand(1); | ||
if (And.getOpcode() != ISD::AND || Lshr.getOpcode() != ISD::SRL) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ideally the operands of the Add
would be treated commutatively - either could be the and
and either could be the srl
. Same for the and
/xor
args below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using sd_match would handle commutative matching for us
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the kind of thing I had in mind:
if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) {
SDValue A, B;
if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
m_SpecificInt(1))))) {
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
}
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But since m_And
and m_Xor
are not available in SDPatternMatch
, we will have to use llvm::PatternMatch
for them. But I don't think if this will work. This is what I get finally and it crashes the build process.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
SDLoc DL(N);
if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) {
SDValue A, B;
if (sd_match(N, llvm::SDPatternMatch::m_Add(llvm::PatternMatch::m_And(m_Value(A), m_Value(B)),
llvm::SDPatternMatch::m_Srl(llvm::PatternMatch::m_Xor(m_Deferred(A), m_Deferred(B)),
m_SpecificInt(1))))) {
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
}
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have added the code for commutative matching, but not using the method as suggested by @RKSimon.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll get m_And / mXor / m_Or added shortly
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, sure thing. Then we can use the shortened version as part of this PR itself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Modified the code accordingly.
@RKSimon, I did some fiddling around with the codebase and I now have the correct implementation with the tests passing. |
EVT VT = And1.getValueType(); | ||
EVT NVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); | ||
if (VT.isVector()) | ||
VT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount()); | ||
else | ||
VT = NVT; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel like all of this should be VT = Or1.getValueType(), and it shouldn't need to adjust the original type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or1.getValueType()
? Yes, that is what fixedwidth means and was also pointed out by RKSimon but the tests don't pass unless I adjust the type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wasn't sure what Fixedwidth was referring to in the title. Usually in LLVM FixedWidth refers to vectors which are not scalable, but it would seem this fold should be able to apply fine to scalable types too.
Currently this code will take a vector type (say v4i32), create an integer type of the size of the original vector (a i128), and then convert it to a vector type with the same number of elements (a v4i128). This won't be legal (and won't match the original types), so the transform won't be being performed where it should. I believe it should be fine to just use the original VT from the existing operations, as they are all the same size and the same size as the node we want to produce.
; CHECK-NEXT: and v2.16b, v0.16b, v1.16b | ||
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b | ||
; CHECK-NEXT: usra v2.4s, v0.4s, #1 | ||
; CHECK-NEXT: mov v0.16b, v2.16b |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any idea why this is not producing a single uhadd instruction?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just ran the automation script for the tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let me rephrase that :) - this test case should be creating an uhadd (which is the aarch64 spelling for AVGFLOORU). Currently it's just producing the same as without the patch. I think it's because of the vector type check.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, indeed but whenever I run the update_llc_test_checks.py on the test case :
define <4 x i32> @uhadd_fixedwidth_v4i32(<4 x i32> %a0, <4 x i32> %a1) {
%and = and <4 x i32> %a0, %a1
%xor = xor <4 x i32> %a0, %a1
%srl = lshr <4 x i32> %xor, <i32 1,i32 1,i32 1,i32 1>
%res = add <4 x i32> %and, %srl
ret <4 x i32> %res
}
I get the same check assertions for both when I add the change in VT to vector type and when I don't. The only difference is that in the former, the tests pass. Is there something else that needs to be done to generate the correct assertions or is there some change in the original DAGCombiner.cpp required?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I wrote the correct check assertions manually now with the original value type and the checks are passing now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the automation script update_llc_test_checks.py
needs to be changed?
@RKSimon, @davemgreen, I have updated the tests and modified the pattern matching with sd_pattern match. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, this LGTM now if others agree.
LLVM tends to prefer early exits to nested if's, but in this case if you are adding AVGCEIL in another patch then it could be handled in this same function, where the if's make sense.
Are any further changes needed? Apart from the merging of AVGCEIL with this once this is submitted. |
Fixes: #84749