-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[DAG] foldShiftToAvg - Fixes avgceil[su] pattern matching for sub+xor form #169199
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-selectiondag Author: Lauren (laurenmchin) ChangesFixes regression where avgceil[su] patterns fail to match when AArch64 canonicalizes Addresses the remaining regression in (https://github.com/llvm/llvm-project/issues/147946)[#147946] Full diff: https://github.com/llvm/llvm-project/pull/169199.diff 1 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6b79dbb46cadc..e874b4d1e59de 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11943,28 +11943,128 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
return SDValue();
EVT VT = N->getValueType(0);
- bool IsUnsigned = Opcode == ISD::SRL;
+ SDValue N0 = N->getOperand(0);
- // Captured values.
- SDValue A, B, Add;
+ if (!isOnesOrOnesSplat(N->getOperand(1)))
+ return SDValue();
- // Match floor average as it is common to both floor/ceil avgs.
+ EVT TruncVT = VT;
+ SDNode *TruncNode = nullptr;
+
+ // We need the correct type to check for avgceil/floor support.
+ if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TRUNCATE) {
+ TruncNode = *N->user_begin();
+ TruncVT = TruncNode->getValueType(0);
+ }
+
+ // NarrowVT is used to detect whether we're working with sign-extended values.
+ EVT NarrowVT = VT;
+ SDValue N00 = N0.getOperand(0);
+
+ // Extract narrow type from SIGN_EXTEND_INREG. For SRL, require the narrow
+ // type to be legal to ensure correct width avg operations.
+ if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ NarrowVT = cast<VTSDNode>(N0->getOperand(0)->getOperand(1))->getVT();
+ if (Opcode == ISD::SRL && !TLI.isTypeLegal(NarrowVT))
+ return SDValue();
+ }
+
+ unsigned FloorISD = 0;
+ unsigned CeilISD = 0;
+ bool IsUnsigned = false;
+
+ // Decide whether signed or unsigned.
+ switch (Opcode) {
+ case ISD::SRA:
+ FloorISD = ISD::AVGFLOORS;
+ break;
+ case ISD::SRL:
+ IsUnsigned = true;
+ if (TruncNode &&
+ (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB)) {
+ // Use signed avg for SRL of sign-extended values when truncating.
+ SDValue N01 = N0.getOperand(1);
+ if ((N00.getOpcode() == ISD::SIGN_EXTEND_INREG ||
+ N00.getOpcode() == ISD::SIGN_EXTEND) ||
+ (N01.getOpcode() == ISD::SIGN_EXTEND_INREG ||
+ N01.getOpcode() == ISD::SIGN_EXTEND))
+ IsUnsigned = false;
+ }
+ FloorISD = (IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS);
+ break;
+ default:
+ return SDValue();
+ }
+
+ CeilISD = (IsUnsigned ? ISD::AVGCEILU : ISD::AVGCEILS);
+
+ // Without truncation, require target support for both averaging operations.
+ // We check FloorISD at VT (generated type), CeilISD at TruncVT (final type).
+ if ((!TruncNode && !TLI.isOperationLegalOrCustom(FloorISD, VT)) ||
+ (!TruncNode && !TLI.isOperationLegalOrCustom(CeilISD, TruncVT)))
+ return SDValue();
+
+ SDValue X, Y, Sub, Xor;
+
+ // fold (sr[al] (sub x, (xor y, -1)), 1) -> (avgceil[su] x, y)
if (sd_match(N, m_BinOp(Opcode,
- m_AllOf(m_Value(Add), m_Add(m_Value(A), m_Value(B))),
+ m_AllOf(m_Value(Sub),
+ m_Sub(m_Value(X),
+ m_AllOf(m_Value(Xor),
+ m_Xor(m_Value(Y), m_Value())))),
m_One()))) {
- // Decide whether signed or unsigned.
- unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
- if (!hasOperation(FloorISD, VT))
- return SDValue();
- // Can't optimize adds that may wrap.
- if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
- (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
- return SDValue();
+ ConstantSDNode *C = isConstOrConstSplat(Xor.getOperand(1),
+ /*AllowUndefs=*/false,
+ /*AllowTruncation=*/true);
+ if (C && C->getAPIntValue().trunc(VT.getScalarSizeInBits()).isAllOnes()) {
+ // Don't fold extended inputs with truncation on fixed vectors > 128b
+ if (TruncNode && VT.isFixedLengthVector() && VT.getSizeInBits() > 128) {
+ if (X.getOpcode() == ISD::SIGN_EXTEND ||
+ X.getOpcode() == ISD::ZERO_EXTEND ||
+ Y.getOpcode() == ISD::SIGN_EXTEND ||
+ Y.getOpcode() == ISD::ZERO_EXTEND)
+ return SDValue();
+ }
+
+ if (!TruncNode) {
+ // Without truncation, require no-wrap flags for safe narrowing.
+ const SDNodeFlags &Flags = Sub->getFlags();
+ if ((!IsUnsigned && (Opcode == ISD::SRA && VT == NarrowVT) &&
+ !Flags.hasNoSignedWrap()) ||
+ (IsUnsigned && !Flags.hasNoUnsignedWrap()))
+ return SDValue();
+ }
- return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
+ // Require avgceil[su] support at the final type:
+ // - with truncation: build at VT, visitTRUNCATE completes the fold
+ // - without truncation: build directly at VT (where TruncVT == VT).
+ if (TLI.isOperationLegalOrCustom(CeilISD, TruncVT))
+ return DAG.getNode(CeilISD, DL, VT, Y, X);
+ }
}
+ // Captured values.
+ SDValue A, B, Add;
+
+ // Match floor average as it is common to both floor/ceil avgs.
+ // fold (sr[al] (add a, b), 1) -> avgfloor[su](a, b)
+ if (!sd_match(N, m_BinOp(Opcode,
+ m_AllOf(m_Value(Add), m_Add(m_Value(A), m_Value(B))),
+ m_One())))
+ return SDValue();
+
+ if (TruncNode && VT.isFixedLengthVector() && VT.getSizeInBits() > 128)
+ return SDValue();
+
+ // Can't optimize adds that may wrap.
+ if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
+ (!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
+ return SDValue();
+
+ EVT TargetVT = TruncNode ? TruncVT : VT;
+ if (TLI.isOperationLegalOrCustom(FloorISD, TargetVT))
+ return DAG.getNode(FloorISD, DL, N->getValueType(0), A, B);
return SDValue();
}
|
b342abf to
5c33d48
Compare
🐧 Linux x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.CodeGen/RISCV/intrinsic-cttz-elts-vscale.llLLVM.CodeGen/RISCV/rvv/bitreverse-vp.llLLVM.CodeGen/RISCV/rvv/bswap-vp.llLLVM.CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.llLLVM.CodeGen/RISCV/rvv/dont-sink-splat-operands.llLLVM.CodeGen/RISCV/rvv/extract-subvector.llLLVM.CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.llLLVM.CodeGen/RISCV/rvv/insert-subvector.llLLVM.CodeGen/RISCV/rvv/legalize-load-sdnode.llLLVM.CodeGen/RISCV/rvv/named-vector-shuffle-reverse.llLLVM.CodeGen/RISCV/rvv/pr107950.llLLVM.CodeGen/RISCV/rvv/riscv-codegenprepare-asm.llLLVM.CodeGen/RISCV/rvv/setcc-fp-vp.llLLVM.CodeGen/RISCV/rvv/setcc-int-vp.llLLVM.CodeGen/RISCV/rvv/sink-splat-operands.llLLVM.CodeGen/RISCV/rvv/vandn-sdnode.llLLVM.CodeGen/RISCV/rvv/vector-deinterleave-fixed.llLLVM.CodeGen/RISCV/rvv/vector-deinterleave-load.llLLVM.CodeGen/RISCV/rvv/vector-deinterleave.llLLVM.CodeGen/RISCV/rvv/vector-interleave-fixed.llLLVM.CodeGen/RISCV/rvv/vector-interleave-store.llLLVM.CodeGen/RISCV/rvv/vector-interleave.llLLVM.CodeGen/RISCV/rvv/vector-splice.llLLVM.CodeGen/RISCV/rvv/vfptoi-sdnode.llLLVM.CodeGen/RISCV/rvv/vp-vector-interleaved-access.llLLVM.CodeGen/RISCV/rvv/vreductions-fp-sdnode.llLLVM.CodeGen/RISCV/rvv/vreductions-fp-vp.llIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
5c33d48 to
74ee978
Compare
… form Fixes regression where avgceil[su] patterns fail to match when AArch64 canonicalizes `(add (add x, y), 1)` to `(sub x, (xor y, -1))`, causing SVE/SVE2 test failures. Addresses the remaining regression in (https://github.com/llvm/llvm-project/issues/147946)[#147946]
74ee978 to
5f7647e
Compare
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions cpp -- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --diff_from_common_commit
View the diff from clang-format here.diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d9bbed3a3..1073cb425 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12029,9 +12029,10 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
// legal.
// // If not, bail out to prevent incorrect folding at the wider type.
// // This ensures operations like srhadd are generated at the correct
-// width. if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) { NarrowVT =
-// cast<VTSDNode>(N0->getOperand(0)->getOperand(1))->getVT(); if (Opcode ==
-// ISD::SRL && !TLI.isTypeLegal(NarrowVT)) return SDValue();
+// width. if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+// NarrowVT = cast<VTSDNode>(N0->getOperand(0)->getOperand(1))->getVT();
+// if (Opcode == ISD::SRL && !TLI.isTypeLegal(NarrowVT))
+// return SDValue();
// }
// unsigned FloorISD = 0;
@@ -12084,7 +12085,8 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
// if (isAllOnesOrAllOnesSplat(Xor.getOperand(1)) ||
// (ISD::isConstantSplatVector(Xor.getOperand(1).getNode(),
-// SplatVal) && SplatVal.trunc(VT.getScalarSizeInBits()).isAllOnes())) {
+// SplatVal) &&
+// SplatVal.trunc(VT.getScalarSizeInBits()).isAllOnes())) {
// // - Can't fold if either op is
// sign/zero-extended for SRL, as SRL
// // is unsigned, and shadd patterns are handled
@@ -12095,19 +12097,23 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
// // into a series of EXTRACT_SUBVECTORs.
// Folding each subvector does not
// // necessarily preserve semantics so they
-// cannot be folded here. if (TruncNode && VT.isFixedLengthVector()) { if
-// (X.getOpcode() == ISD::SIGN_EXTEND || X.getOpcode() == ISD::ZERO_EXTEND ||
-// Y.getOpcode() ==
-// ISD::SIGN_EXTEND || Y.getOpcode() == ISD::ZERO_EXTEND || VT.getSizeInBits() >
-// 128) return SDValue();
+// cannot be folded here. if (TruncNode &&
+// VT.isFixedLengthVector()) { if
+// (X.getOpcode() == ISD::SIGN_EXTEND ||
+// X.getOpcode() == ISD::ZERO_EXTEND || Y.getOpcode() == ISD::SIGN_EXTEND ||
+// Y.getOpcode() == ISD::ZERO_EXTEND || VT.getSizeInBits() > 128)
+// return SDValue();
// }
// // If there is no truncate user, ensure the
// relevant no wrap flag is on
// // the sub so that narrowing the widened result
-// is defined. if (Opcode == ISD::SRA && VT == NarrowVT) { if (!IsUnsigned &&
-// !Sub->getFlags().hasNoSignedWrap()) return SDValue(); } else if (IsUnsigned
-// && !Sub->getFlags().hasNoUnsignedWrap()) return SDValue();
+// is defined. if (Opcode == ISD::SRA && VT ==
+// NarrowVT) { if (!IsUnsigned &&
+// !Sub->getFlags().hasNoSignedWrap())
+// return SDValue(); } else if (IsUnsigned
+// && !Sub->getFlags().hasNoUnsignedWrap())
+// return SDValue();
// // Only fold if the target supports avgceil[su]
// at the truncated type:
@@ -12126,7 +12132,8 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
// support for the VT at the
// // final observable type (TruncVT or VT).
// if (TLI.isOperationLegalOrCustom(CeilISD,
-// TruncVT)) return DAG.getNode(CeilISD, DL, VT, Y, X);
+// TruncVT)) return DAG.getNode(CeilISD, DL,
+// VT, Y, X);
// }
// }
@@ -12214,8 +12221,8 @@ SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
// if ((N00.getOpcode() == ISD::SIGN_EXTEND_INREG ||
// N00.getOpcode() == ISD::SIGN_EXTEND) ||
// (N01.getOpcode() ==
-// ISD::SIGN_EXTEND_INREG || N01.getOpcode() == ISD::SIGN_EXTEND)) IsUnsigned =
-// false;
+// ISD::SIGN_EXTEND_INREG || N01.getOpcode()
+// == ISD::SIGN_EXTEND)) IsUnsigned = false;
// }
// FloorISD = (IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS);
|
Fixes regression where avgceil[su] patterns fail to match when AArch64 canonicalizes
(add (add x, y), 1)to(sub x, (xor y, -1)), causing SVE/SVE2 test failures for DAG topological sorting.Addresses the remaining regression in #147946