From 7b285ae0e8e65f4dd12a2d734b5700710e7d9c24 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Sat, 18 Jun 2022 13:36:22 +0100 Subject: [PATCH] [SVE] Lower "unpredicated" sabd/uabd intrinsics to ISD::ABDS/U. This enables an existing transformation that when combined with an add will emit saba/uaba instructions. Differential Revision: https://reviews.llvm.org/D128198 --- .../Target/AArch64/AArch64ISelLowering.cpp | 4 + llvm/test/CodeGen/AArch64/sve-aba.ll | 107 +++++++++++++++++- 2 files changed, 107 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c48244e78ad5a..61da79f8fbe3d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16286,6 +16286,10 @@ static SDValue performIntrinsicCombine(SDNode *N, return convertMergedOpToPredOp(N, ISD::XOR, DAG, true); case Intrinsic::aarch64_sve_orr: return convertMergedOpToPredOp(N, ISD::OR, DAG, true); + case Intrinsic::aarch64_sve_sabd: + return convertMergedOpToPredOp(N, ISD::ABDS, DAG, true); + case Intrinsic::aarch64_sve_uabd: + return convertMergedOpToPredOp(N, ISD::ABDU, DAG, true); case Intrinsic::aarch64_sve_sqadd: return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true); case Intrinsic::aarch64_sve_sqsub: diff --git a/llvm/test/CodeGen/AArch64/sve-aba.ll b/llvm/test/CodeGen/AArch64/sve-aba.ll index cf7da62bbcf00..f3953c98bd668 100644 --- a/llvm/test/CodeGen/AArch64/sve-aba.ll +++ b/llvm/test/CodeGen/AArch64/sve-aba.ll @@ -36,6 +36,17 @@ define @saba_b_promoted_ops( %a, %add } +define @saba_b_from_sabd( %a, %b, %c) #0 { +; CHECK-LABEL: saba_b_from_sabd: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = call @llvm.aarch64.sve.sabd.nxv16i8( %1, %b, %c) + %3 = add %2, %a + ret %3 +} + define @saba_h( %a, %b, %c) #0 { ; CHECK-LABEL: saba_h: ; CHECK: // %bb.0: @@ -66,6 +77,17 @@ define @saba_h_promoted_ops( %a, %add } +define @saba_h_from_sabd( %a, %b, %c) #0 { +; CHECK-LABEL: saba_h_from_sabd: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = call @llvm.aarch64.sve.sabd.nxv8i16( %1, %b, %c) + %3 = add %2, %a + ret %3 +} + define @saba_s( %a, %b, %c) #0 { ; CHECK-LABEL: saba_s: ; CHECK: // %bb.0: @@ -96,6 +118,17 @@ define @saba_s_promoted_ops( %a, %add } +define @saba_s_from_sabd( %a, %b, %c) #0 { +; CHECK-LABEL: saba_s_from_sabd: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = call @llvm.aarch64.sve.sabd.nxv4i32( %1, %b, %c) + %3 = add %2, %a + ret %3 +} + define @saba_d( %a, %b, %c) #0 { ; CHECK-LABEL: saba_d: ; CHECK: // %bb.0: @@ -126,6 +159,17 @@ define @saba_d_promoted_ops( %a, %add } +define @saba_d_from_sabd( %a, %b, %c) #0 { +; CHECK-LABEL: saba_d_from_sabd: +; CHECK: // %bb.0: +; CHECK-NEXT: saba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = call @llvm.aarch64.sve.sabd.nxv2i64( %1, %b, %c) + %3 = add %2, %a + ret %3 +} + ; ; UABA ; @@ -159,6 +203,17 @@ define @uaba_b_promoted_ops( %a, %add } +define @uaba_b_from_uabd( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_b_from_uabd: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.b, z1.b, z2.b +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = call @llvm.aarch64.sve.uabd.nxv16i8( %1, %b, %c) + %3 = add %2, %a + ret %3 +} + define @uaba_h( %a, %b, %c) #0 { ; CHECK-LABEL: uaba_h: ; CHECK: // %bb.0: @@ -188,6 +243,17 @@ define @uaba_h_promoted_ops( %a, %add } +define @uaba_h_from_uabd( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_h_from_uabd: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = call @llvm.aarch64.sve.uabd.nxv8i16( %1, %b, %c) + %3 = add %2, %a + ret %3 +} + define @uaba_s( %a, %b, %c) #0 { ; CHECK-LABEL: uaba_s: ; CHECK: // %bb.0: @@ -217,6 +283,17 @@ define @uaba_s_promoted_ops( %a, %add } +define @uaba_s_from_uabd( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_s_from_uabd: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.s, z1.s, z2.s +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = call @llvm.aarch64.sve.uabd.nxv4i32( %1, %b, %c) + %3 = add %2, %a + ret %3 +} + define @uaba_d( %a, %b, %c) #0 { ; CHECK-LABEL: uaba_d: ; CHECK: // %bb.0: @@ -246,6 +323,17 @@ define @uaba_d_promoted_ops( %a, %add } +define @uaba_d_from_uabd( %a, %b, %c) #0 { +; CHECK-LABEL: uaba_d_from_uabd: +; CHECK: // %bb.0: +; CHECK-NEXT: uaba z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %1 = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = call @llvm.aarch64.sve.uabd.nxv2i64( %1, %b, %c) + %3 = add %2, %a + ret %3 +} + ; A variant of uaba_s but with the add operands switched. define @uaba_s_commutative( %a, %b, %c) #0 { ; CHECK-LABEL: uaba_s_commutative: @@ -262,16 +350,27 @@ define @uaba_s_commutative( %a, @llvm.abs.nxv16i8(, i1) - declare @llvm.abs.nxv8i16(, i1) declare @llvm.abs.nxv16i16(, i1) - declare @llvm.abs.nxv4i32(, i1) declare @llvm.abs.nxv8i32(, i1) - declare @llvm.abs.nxv2i64(, i1) declare @llvm.abs.nxv4i64(, i1) - declare @llvm.abs.nxv2i128(, i1) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) + +declare @llvm.aarch64.sve.sabd.nxv16i8(, , ) +declare @llvm.aarch64.sve.sabd.nxv8i16(, , ) +declare @llvm.aarch64.sve.sabd.nxv4i32(, , ) +declare @llvm.aarch64.sve.sabd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.uabd.nxv16i8(, , ) +declare @llvm.aarch64.sve.uabd.nxv8i16(, , ) +declare @llvm.aarch64.sve.uabd.nxv4i32(, , ) +declare @llvm.aarch64.sve.uabd.nxv2i64(, , ) + attributes #0 = { "target-features"="+neon,+sve,+sve2" }