diff --git a/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll b/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll new file mode 100644 index 0000000000000..859c0ffad4c72 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll @@ -0,0 +1,330 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; INCP + +define i32 @cntp_add_all_active_nxv16i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: cntp x8, p1, p0.b +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %1, %pg) + %3 = trunc i64 %2 to i32 + %add = add i32 %3, %x + ret i32 %add +} + +define i32 @cntp_add_all_active_nxv8i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: cntp x8, p1, p0.h +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %1, %pg) + %3 = trunc i64 %2 to i32 + %add = add i32 %3, %x + ret i32 %add +} + +define i32 @cntp_add_all_active_nxv4i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: cntp x8, p1, p0.s +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %1, %pg) + %3 = trunc i64 %2 to i32 + %add = add i32 %3, %x + ret i32 %add +} + +define i32 @cntp_add_all_active_nxv2i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: cntp x8, p1, p0.d +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) + %3 = trunc i64 %2 to i32 + %add = add i32 %3, %x + ret i32 %add +} + +define i32 @cntp_add_all_active_nxv8i1_via_cast(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv8i1_via_cast: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: cntp x8, p1, p0.h +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) + %3 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %2, %pg) + %4 = trunc i64 %3 to i32 + %add = add i32 %4, %x + ret i32 %add +} + +define i64 @cntp_add_all_active_nxv2i1_multiuse(i32 %x, i64 %y, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv2i1_multiuse: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: cntp x8, p1, p0.d +; CHECK-NEXT: add w9, w8, w0 +; CHECK-NEXT: madd x0, x8, x1, x9 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) + %3 = trunc i64 %2 to i32 + %add = add i32 %3, %x + %add.ext = zext i32 %add to i64 + %mul = mul i64 %2, %y + %res = add i64 %add.ext, %mul + ret i64 %res +} + +define i32 @cntp_add_same_active_nxv16i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.b +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %pg, %pg) + %2 = trunc i64 %1 to i32 + %add = add i32 %2, %x + ret i32 %add +} + +define i32 @cntp_add_same_active_nxv8i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.h +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %pg, %pg) + %2 = trunc i64 %1 to i32 + %add = add i32 %2, %x + ret i32 %add +} + +define i32 @cntp_add_same_active_nxv4i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.s +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %pg, %pg) + %2 = trunc i64 %1 to i32 + %add = add i32 %2, %x + ret i32 %add +} + +define i32 @cntp_add_same_active_nxv2i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.d +; CHECK-NEXT: add w0, w8, w0 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) + %2 = trunc i64 %1 to i32 + %add = add i32 %2, %x + ret i32 %add +} + +define i64 @cntp_add_same_active_nxv2i1_multiuse(i32 %x, i64 %y, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv2i1_multiuse: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.d +; CHECK-NEXT: add w9, w8, w0 +; CHECK-NEXT: madd x0, x8, x1, x9 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) + %2 = trunc i64 %1 to i32 + %add = add i32 %2, %x + %add.ext = zext i32 %add to i64 + %mul = mul i64 %1, %y + %res = add i64 %add.ext, %mul + ret i64 %res +} + +; DECP + +define i32 @cntp_sub_all_active_nxv16i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: cntp x8, p1, p0.b +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %1, %pg) + %3 = trunc i64 %2 to i32 + %sub = sub i32 %x, %3 + ret i32 %sub +} + +define i32 @cntp_sub_all_active_nxv8i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: cntp x8, p1, p0.h +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %1, %pg) + %3 = trunc i64 %2 to i32 + %sub = sub i32 %x, %3 + ret i32 %sub +} + +define i32 @cntp_sub_all_active_nxv4i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: cntp x8, p1, p0.s +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %1, %pg) + %3 = trunc i64 %2 to i32 + %sub = sub i32 %x, %3 + ret i32 %sub +} + +define i32 @cntp_sub_all_active_nxv2i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: cntp x8, p1, p0.d +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) + %3 = trunc i64 %2 to i32 + %sub = sub i32 %x, %3 + ret i32 %sub +} + +define i32 @cntp_sub_all_active_nxv8i1_via_cast(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv8i1_via_cast: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: cntp x8, p1, p0.h +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) + %3 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %2, %pg) + %4 = trunc i64 %3 to i32 + %sub = sub i32 %x, %4 + ret i32 %sub +} + +define i64 @cntp_sub_all_active_nxv2i1_multiuse(i32 %x, i64 %y, %pg) #0 { +; CHECK-LABEL: cntp_sub_all_active_nxv2i1_multiuse: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: cntp x8, p1, p0.d +; CHECK-NEXT: sub w9, w8, w0 +; CHECK-NEXT: madd x0, x8, x1, x9 +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) + %3 = trunc i64 %2 to i32 + %sub = sub i32 %3, %x + %sub.ext = zext i32 %sub to i64 + %mul = mul i64 %2, %y + %res = add i64 %sub.ext, %mul + ret i64 %res +} + +define i32 @cntp_sub_same_active_nxv16i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_same_active_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.b +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1( %pg, %pg) + %2 = trunc i64 %1 to i32 + %sub = sub i32 %x, %2 + ret i32 %sub +} + +define i32 @cntp_sub_same_active_nxv8i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_same_active_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.h +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1( %pg, %pg) + %2 = trunc i64 %1 to i32 + %sub = sub i32 %x, %2 + ret i32 %sub +} + +define i32 @cntp_sub_same_active_nxv4i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_same_active_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.s +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1( %pg, %pg) + %2 = trunc i64 %1 to i32 + %sub = sub i32 %x, %2 + ret i32 %sub +} + +define i32 @cntp_sub_same_active_nxv2i1(i32 %x, %pg) #0 { +; CHECK-LABEL: cntp_sub_same_active_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.d +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) + %2 = trunc i64 %1 to i32 + %sub = sub i32 %x, %2 + ret i32 %sub +} + +define i64 @cntp_sub_same_active_nxv2i1_multiuse(i32 %x, i64 %y, %pg) #0 { +; CHECK-LABEL: cntp_sub_same_active_nxv2i1_multiuse: +; CHECK: // %bb.0: +; CHECK-NEXT: cntp x8, p0, p0.d +; CHECK-NEXT: sub w9, w8, w0 +; CHECK-NEXT: madd x0, x8, x1, x9 +; CHECK-NEXT: ret + %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) + %2 = trunc i64 %1 to i32 + %sub = sub i32 %2, %x + %sub.ext = zext i32 %sub to i64 + %mul = mul i64 %1, %y + %res = add i64 %sub.ext, %mul + ret i64 %res +} + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +declare i64 @llvm.aarch64.sve.cntp.nxv16i1(, ) +declare i64 @llvm.aarch64.sve.cntp.nxv8i1(, ) +declare i64 @llvm.aarch64.sve.cntp.nxv4i1(, ) +declare i64 @llvm.aarch64.sve.cntp.nxv2i1(, ) + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-cntp-combine.ll b/llvm/test/CodeGen/AArch64/sve-cntp-combine-i64.ll similarity index 96% rename from llvm/test/CodeGen/AArch64/sve-cntp-combine.ll rename to llvm/test/CodeGen/AArch64/sve-cntp-combine-i64.ll index eb5122b51f227..bf4cad0bb33cf 100644 --- a/llvm/test/CodeGen/AArch64/sve-cntp-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-cntp-combine-i64.ll @@ -61,8 +61,8 @@ define i64 @cntp_add_all_active_nxv8i1_via_cast(i64 %x, %pg) # ret i64 %add } -define i64 @cntp_add_all_active_nxv2i1_oneuse(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_add_all_active_nxv2i1_oneuse: +define i64 @cntp_add_all_active_nxv2i1_multiuse(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_all_active_nxv2i1_multiuse: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: cntp x8, p1, p0.d @@ -117,8 +117,8 @@ define i64 @cntp_add_same_active_nxv2i1(i64 %x, %pg) #0 { ret i64 %add } -define i64 @cntp_add_same_active_nxv2i1_oneuse(i64 %x, %pg) #0 { -; CHECK-LABEL: cntp_add_same_active_nxv2i1_oneuse: +define i64 @cntp_add_same_active_nxv2i1_multiuse(i64 %x, %pg) #0 { +; CHECK-LABEL: cntp_add_same_active_nxv2i1_multiuse: ; CHECK: // %bb.0: ; CHECK-NEXT: cntp x8, p0, p0.d ; CHECK-NEXT: add x9, x8, x0 @@ -199,9 +199,9 @@ define i64 @cntp_sub_all_active_nxv2i1_multiuse(i64 %x, %pg) # ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %1, %pg) - %add = sub i64 %2, %x + %sub = sub i64 %2, %x %mul = mul i64 %2, %x - %res = add i64 %add, %mul + %res = add i64 %sub, %mul ret i64 %res } @@ -253,9 +253,9 @@ define i64 @cntp_sub_same_active_nxv2i1_multiuse(i64 %x, %pg) ; CHECK-NEXT: madd x0, x8, x0, x9 ; CHECK-NEXT: ret %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1( %pg, %pg) - %add = sub i64 %1, %x + %sub = sub i64 %1, %x %mul = mul i64 %1, %x - %res = add i64 %add, %mul + %res = add i64 %sub, %mul ret i64 %res }