Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DAG] computeKnownBits - add ISD::ABDU/ISD::ABDS handling #84905 #88253

Merged
merged 3 commits into from
Apr 12, 2024

Conversation

SahilPatidar
Copy link
Contributor

Resolve #84905

@SahilPatidar
Copy link
Contributor Author

@RKSimon, Let me know if any of the tests make sense to you.

@RKSimon RKSimon changed the title add ISD::ABDU/ISD::ABDS handling #84905 [DAG] computeKnownBits - add ISD::ABDU/ISD::ABDS handling #84905 Apr 10, 2024
@RKSimon
Copy link
Collaborator

RKSimon commented Apr 10, 2024

Please can you add the new tests as the first commit in the patch, and the handling (and test changes) as the second commit so it can show the effect

@RKSimon RKSimon added the llvm:SelectionDAG SelectionDAGISel as well label Apr 10, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Apr 10, 2024

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-selectiondag

Author: None (SahilPatidar)

Changes

Resolve #84905


Full diff: https://github.com/llvm/llvm-project/pull/88253.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+12)
  • (modified) llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll (+153)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1dd0fa49a460f8..a3953c2f29ffeb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3397,6 +3397,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     Known = KnownBits::mulhs(Known, Known2);
     break;
   }
+  case ISD::ABDU: {
+    Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known = KnownBits::abdu(Known, Known2);
+    break;
+  }
+  case ISD::ABDS: {
+    Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known = KnownBits::abds(Known, Known2);
+    break;
+  }
   case ISD::UMUL_LOHI: {
     assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
     Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
index a13eac9b0a5e65..6d5a8ad0989719 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
@@ -291,3 +291,156 @@ define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
   %abd = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
   ret <2 x double> %abd
 }
+
+define <8 x i16> @test_uabd_knownbits_vec8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_uabd_knownbits_vec8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.8h, #15
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    rev64 v0.8h, v0.8h
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+  %and1 = and <8 x i16> %lhs, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %and2 = and <8 x i16> %rhs, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %uabd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %and1, <8 x i16> %and2)
+  %suff = shufflevector <8 x i16> %uabd, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %res = and <8 x i16> %suff, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @knownbits_uabd_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: knownbits_uabd_mask_and_shuffle_lshr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ushr v0.4s, v0.4s, #17
+; CHECK-NEXT:    ret
+  %1 = and <4 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535>
+  %2 = and <4 x i32> %a1, <i32 65535, i32 65535, i32 65535, i32 65535>
+  %3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2)
+  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
+  ret <4 x i32> %5
+}
+
+define <4 x i32> @knownbits_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: knownbits_mask_and_shuffle_lshr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
+  %1 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
+  %2 = and <4 x i32> %a1, <i32 32767, i32 32767, i32 32767, i32 32767>
+  %3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2)
+  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
+  ret <4 x i32> %5
+}
+
+define <4 x i32> @test_sabd_knownbits_vec4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sabd_knownbits_vec4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI31_0
+; CHECK-NEXT:    adrp x9, .LCPI31_1
+; CHECK-NEXT:    movi v4.2d, #0x0000ff000000ff
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI31_0]
+; CHECK-NEXT:    ldr q3, [x9, :lo12:.LCPI31_1]
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v3.16b
+; CHECK-NEXT:    sub v0.4s, v4.4s, v0.4s
+; CHECK-NEXT:    sub v1.4s, v4.4s, v1.4s
+; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mov v0.s[1], v0.s[0]
+; CHECK-NEXT:    trn2 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v4.16b
+; CHECK-NEXT:    ret
+  %and1 = and <4 x i32> %lhs, <i32 255, i32 -1, i32 -1, i32 255>
+  %and2 = and <4 x i32> %rhs, <i32 255, i32 255, i32 -1, i32 -1>
+  %sub1 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %and1
+  %sub2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %and2
+  %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %sub1, <4 x i32> %sub2)
+  %s = shufflevector <4 x i32> %abd, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
+  %4 = and <4 x i32> %s, <i32 255, i32 255, i32 255, i32 255>
+  ret <4 x i32> %4
+}
+
+define <4 x i32> @knownbits_sabd_and_mask_sub(<4 x i16> %a0) {
+; CHECK-LABEL: knownbits_sabd_and_mask_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg v1.4h, v0.4h
+; CHECK-NEXT:    sabd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ret
+  %1 = and <4 x i16> %a0, <i16 255, i16 255, i16 255, i16 255>
+  %2 = sub <4 x i16> zeroinitializer, %a0
+  %3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a0, <4 x i16> %2)
+  %4 = sext <4 x i16> %3 to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <4 x i32> @knownbits_uabd_and_mask_sub(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: knownbits_uabd_and_mask_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT:    movi v3.2d, #0x0000ff000000ff
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    sub v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v3.16b
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+  %1 = sub <4 x i32> %a0, <i32 255, i32 255, i32 255, i32 255>
+  %2 = sub <4 x i32> %a1, <i32 255, i32 255, i32 255, i32 255>
+  %3 = and <4 x i32> %1, <i32 255, i32 255, i32 255, i32 255>
+  %4 = and <4 x i32> %2, <i32 255, i32 255, i32 255, i32 255>
+  %5 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %3, <4 x i32> %4)
+  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %6
+}
+
+define <4 x i32> @knownbits_uabd_and_lshr_mask(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: knownbits_uabd_and_lshr_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.4s, #3, msl #8
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    ushr v0.4s, v0.4s, #4
+; CHECK-NEXT:    ushr v1.4s, v1.4s, #4
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+  %1 = and <4 x i32> %a0, <i32 1023, i32 1023, i32 1023, i32 1023>
+  %2 = and <4 x i32> %a1, <i32 1023, i32 1023, i32 1023, i32 1023>
+  %3 = lshr <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
+  %4 = lshr <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
+  %5 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %3, <4 x i32> %4)
+  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %6
+}
+
+define <4 x i32> @knownbits_sabd_and_mask(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: knownbits_sabd_and_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI35_0
+; CHECK-NEXT:    adrp x9, .LCPI35_1
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI35_0]
+; CHECK-NEXT:    ldr q3, [x9, :lo12:.LCPI35_1]
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v3.16b
+; CHECK-NEXT:    shl v0.4s, v0.4s, #4
+; CHECK-NEXT:    shl v1.4s, v1.4s, #4
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mov v0.s[1], v0.s[0]
+; CHECK-NEXT:    trn2 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %1 = and <4 x i32> %a0, <i32 1023, i32 -1, i32 1023, i32 1023>
+  %2 = and <4 x i32> %a1, <i32 1023, i32 1023, i32 -1, i32 1023>
+  %3 = shl <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
+  %4 = shl <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
+  %5 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %3, <4 x i32> %4)
+  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
+  ret <4 x i32> %6
+}

@SahilPatidar
Copy link
Contributor Author

@RKSimon, I checked that out! I made separate commits and changed some test cases.

@RKSimon RKSimon marked this pull request as ready for review April 12, 2024 09:57
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please can you look at the failing AArch64 test:

Failed Tests (1):
  LLVM :: CodeGen/AArch64/abd-combine.ll

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@RKSimon RKSimon merged commit ab037c4 into llvm:main Apr 12, 2024
3 of 4 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[DAG] SelectionDAG::computeKnownBits - add ISD::ABDU/ISD::ABDS handling
3 participants