Skip to content

Commit

Permalink
[DAG] foldABSToABD - add support for abs(sub(sign_extend_inreg(),sign…
Browse files Browse the repository at this point in the history
…_extend_inreg())) patterns

Partial fix for ABDS regressions on D152928
  • Loading branch information
RKSimon committed Nov 15, 2023
1 parent 9180b9f commit de41396
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
17 changes: 13 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -10905,9 +10905,12 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
Op1 = AbsOp1.getOperand(1);

unsigned Opc0 = Op0.getOpcode();

// Check if the operands of the sub are (zero|sign)-extended.
// TODO: Should we use ValueTracking instead?
if (Opc0 != Op1.getOpcode() ||
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) {
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
Opc0 != ISD::SIGN_EXTEND_INREG)) {
// fold (abs (sub nsw x, y)) -> abds(x, y)
if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
TLI.preferABDSToABSWithNSW(VT)) {
Expand All @@ -10917,9 +10920,15 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
return SDValue();
}

EVT VT0 = Op0.getOperand(0).getValueType();
EVT VT1 = Op1.getOperand(0).getValueType();
unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
EVT VT0, VT1;
if (Opc0 == ISD::SIGN_EXTEND_INREG) {
VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
} else {
VT0 = Op0.getOperand(0).getValueType();
VT1 = Op1.getOperand(0).getValueType();
}
unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;

// fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
// fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/Thumb2/mve-vabdus.ll
Expand Up @@ -40,8 +40,7 @@ define arm_aapcs_vfpcc <4 x i8> @vabd_v4s8(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK-NEXT: vmovlb.s8 q0, q0
; CHECK-NEXT: vmovlb.s16 q1, q1
; CHECK-NEXT: vmovlb.s16 q0, q0
; CHECK-NEXT: vsub.i32 q0, q0, q1
; CHECK-NEXT: vabs.s32 q0, q0
; CHECK-NEXT: vabd.s32 q0, q0, q1
; CHECK-NEXT: bx lr
%sextsrc1 = sext <4 x i8> %src1 to <4 x i16>
%sextsrc2 = sext <4 x i8> %src2 to <4 x i16>
Expand Down

0 comments on commit de41396

Please sign in to comment.