-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SVE] Extend incp/decp testing to cover 32-bit use cases.
- Loading branch information
1 parent
6e127c6
commit c9eec3b
Showing
2 changed files
with
338 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,330 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc < %s | FileCheck %s | ||
|
||
target triple = "aarch64-unknown-linux-gnu" | ||
|
||
; INCP | ||
|
||
define i32 @cntp_add_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_all_active_nxv16i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.b | ||
; CHECK-NEXT: cntp x8, p1, p0.b | ||
; CHECK-NEXT: add w0, w8, w0 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) | ||
%2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %pg) | ||
%3 = trunc i64 %2 to i32 | ||
%add = add i32 %3, %x | ||
ret i32 %add | ||
} | ||
|
||
define i32 @cntp_add_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_all_active_nxv8i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.h | ||
; CHECK-NEXT: cntp x8, p1, p0.h | ||
; CHECK-NEXT: add w0, w8, w0 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) | ||
%2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %pg) | ||
%3 = trunc i64 %2 to i32 | ||
%add = add i32 %3, %x | ||
ret i32 %add | ||
} | ||
|
||
define i32 @cntp_add_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_all_active_nxv4i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.s | ||
; CHECK-NEXT: cntp x8, p1, p0.s | ||
; CHECK-NEXT: add w0, w8, w0 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) | ||
%2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %pg) | ||
%3 = trunc i64 %2 to i32 | ||
%add = add i32 %3, %x | ||
ret i32 %add | ||
} | ||
|
||
define i32 @cntp_add_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_all_active_nxv2i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.d | ||
; CHECK-NEXT: cntp x8, p1, p0.d | ||
; CHECK-NEXT: add w0, w8, w0 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) | ||
%2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg) | ||
%3 = trunc i64 %2 to i32 | ||
%add = add i32 %3, %x | ||
ret i32 %add | ||
} | ||
|
||
define i32 @cntp_add_all_active_nxv8i1_via_cast(i32 %x, <vscale x 8 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_all_active_nxv8i1_via_cast: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.b | ||
; CHECK-NEXT: cntp x8, p1, p0.h | ||
; CHECK-NEXT: add w0, w8, w0 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) | ||
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1) | ||
%3 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %2, <vscale x 8 x i1> %pg) | ||
%4 = trunc i64 %3 to i32 | ||
%add = add i32 %4, %x | ||
ret i32 %add | ||
} | ||
|
||
define i64 @cntp_add_all_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_all_active_nxv2i1_multiuse: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.d | ||
; CHECK-NEXT: cntp x8, p1, p0.d | ||
; CHECK-NEXT: add w9, w8, w0 | ||
; CHECK-NEXT: madd x0, x8, x1, x9 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) | ||
%2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg) | ||
%3 = trunc i64 %2 to i32 | ||
%add = add i32 %3, %x | ||
%add.ext = zext i32 %add to i64 | ||
%mul = mul i64 %2, %y | ||
%res = add i64 %add.ext, %mul | ||
ret i64 %res | ||
} | ||
|
||
define i32 @cntp_add_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_same_active_nxv16i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cntp x8, p0, p0.b | ||
; CHECK-NEXT: add w0, w8, w0 | ||
; CHECK-NEXT: ret | ||
%1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg) | ||
%2 = trunc i64 %1 to i32 | ||
%add = add i32 %2, %x | ||
ret i32 %add | ||
} | ||
|
||
define i32 @cntp_add_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_same_active_nxv8i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cntp x8, p0, p0.h | ||
; CHECK-NEXT: add w0, w8, w0 | ||
; CHECK-NEXT: ret | ||
%1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg) | ||
%2 = trunc i64 %1 to i32 | ||
%add = add i32 %2, %x | ||
ret i32 %add | ||
} | ||
|
||
define i32 @cntp_add_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_same_active_nxv4i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cntp x8, p0, p0.s | ||
; CHECK-NEXT: add w0, w8, w0 | ||
; CHECK-NEXT: ret | ||
%1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg) | ||
%2 = trunc i64 %1 to i32 | ||
%add = add i32 %2, %x | ||
ret i32 %add | ||
} | ||
|
||
define i32 @cntp_add_same_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_same_active_nxv2i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cntp x8, p0, p0.d | ||
; CHECK-NEXT: add w0, w8, w0 | ||
; CHECK-NEXT: ret | ||
%1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg) | ||
%2 = trunc i64 %1 to i32 | ||
%add = add i32 %2, %x | ||
ret i32 %add | ||
} | ||
|
||
define i64 @cntp_add_same_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_add_same_active_nxv2i1_multiuse: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cntp x8, p0, p0.d | ||
; CHECK-NEXT: add w9, w8, w0 | ||
; CHECK-NEXT: madd x0, x8, x1, x9 | ||
; CHECK-NEXT: ret | ||
%1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg) | ||
%2 = trunc i64 %1 to i32 | ||
%add = add i32 %2, %x | ||
%add.ext = zext i32 %add to i64 | ||
%mul = mul i64 %1, %y | ||
%res = add i64 %add.ext, %mul | ||
ret i64 %res | ||
} | ||
|
||
; DECP | ||
|
||
define i32 @cntp_sub_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_all_active_nxv16i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.b | ||
; CHECK-NEXT: cntp x8, p1, p0.b | ||
; CHECK-NEXT: sub w0, w0, w8 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) | ||
%2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %pg) | ||
%3 = trunc i64 %2 to i32 | ||
%sub = sub i32 %x, %3 | ||
ret i32 %sub | ||
} | ||
|
||
define i32 @cntp_sub_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_all_active_nxv8i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.h | ||
; CHECK-NEXT: cntp x8, p1, p0.h | ||
; CHECK-NEXT: sub w0, w0, w8 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) | ||
%2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %pg) | ||
%3 = trunc i64 %2 to i32 | ||
%sub = sub i32 %x, %3 | ||
ret i32 %sub | ||
} | ||
|
||
define i32 @cntp_sub_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_all_active_nxv4i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.s | ||
; CHECK-NEXT: cntp x8, p1, p0.s | ||
; CHECK-NEXT: sub w0, w0, w8 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) | ||
%2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %pg) | ||
%3 = trunc i64 %2 to i32 | ||
%sub = sub i32 %x, %3 | ||
ret i32 %sub | ||
} | ||
|
||
define i32 @cntp_sub_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_all_active_nxv2i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.d | ||
; CHECK-NEXT: cntp x8, p1, p0.d | ||
; CHECK-NEXT: sub w0, w0, w8 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) | ||
%2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg) | ||
%3 = trunc i64 %2 to i32 | ||
%sub = sub i32 %x, %3 | ||
ret i32 %sub | ||
} | ||
|
||
define i32 @cntp_sub_all_active_nxv8i1_via_cast(i32 %x, <vscale x 8 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_all_active_nxv8i1_via_cast: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.b | ||
; CHECK-NEXT: cntp x8, p1, p0.h | ||
; CHECK-NEXT: sub w0, w0, w8 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) | ||
%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1) | ||
%3 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %2, <vscale x 8 x i1> %pg) | ||
%4 = trunc i64 %3 to i32 | ||
%sub = sub i32 %x, %4 | ||
ret i32 %sub | ||
} | ||
|
||
define i64 @cntp_sub_all_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_all_active_nxv2i1_multiuse: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p1.d | ||
; CHECK-NEXT: cntp x8, p1, p0.d | ||
; CHECK-NEXT: sub w9, w8, w0 | ||
; CHECK-NEXT: madd x0, x8, x1, x9 | ||
; CHECK-NEXT: ret | ||
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) | ||
%2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg) | ||
%3 = trunc i64 %2 to i32 | ||
%sub = sub i32 %3, %x | ||
%sub.ext = zext i32 %sub to i64 | ||
%mul = mul i64 %2, %y | ||
%res = add i64 %sub.ext, %mul | ||
ret i64 %res | ||
} | ||
|
||
define i32 @cntp_sub_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_same_active_nxv16i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cntp x8, p0, p0.b | ||
; CHECK-NEXT: sub w0, w0, w8 | ||
; CHECK-NEXT: ret | ||
%1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg) | ||
%2 = trunc i64 %1 to i32 | ||
%sub = sub i32 %x, %2 | ||
ret i32 %sub | ||
} | ||
|
||
define i32 @cntp_sub_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_same_active_nxv8i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cntp x8, p0, p0.h | ||
; CHECK-NEXT: sub w0, w0, w8 | ||
; CHECK-NEXT: ret | ||
%1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg) | ||
%2 = trunc i64 %1 to i32 | ||
%sub = sub i32 %x, %2 | ||
ret i32 %sub | ||
} | ||
|
||
define i32 @cntp_sub_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_same_active_nxv4i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cntp x8, p0, p0.s | ||
; CHECK-NEXT: sub w0, w0, w8 | ||
; CHECK-NEXT: ret | ||
%1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg) | ||
%2 = trunc i64 %1 to i32 | ||
%sub = sub i32 %x, %2 | ||
ret i32 %sub | ||
} | ||
|
||
define i32 @cntp_sub_same_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_same_active_nxv2i1: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cntp x8, p0, p0.d | ||
; CHECK-NEXT: sub w0, w0, w8 | ||
; CHECK-NEXT: ret | ||
%1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg) | ||
%2 = trunc i64 %1 to i32 | ||
%sub = sub i32 %x, %2 | ||
ret i32 %sub | ||
} | ||
|
||
define i64 @cntp_sub_same_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 { | ||
; CHECK-LABEL: cntp_sub_same_active_nxv2i1_multiuse: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cntp x8, p0, p0.d | ||
; CHECK-NEXT: sub w9, w8, w0 | ||
; CHECK-NEXT: madd x0, x8, x1, x9 | ||
; CHECK-NEXT: ret | ||
%1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg) | ||
%2 = trunc i64 %1 to i32 | ||
%sub = sub i32 %2, %x | ||
%sub.ext = zext i32 %sub to i64 | ||
%mul = mul i64 %1, %y | ||
%res = add i64 %sub.ext, %mul | ||
ret i64 %res | ||
} | ||
|
||
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>) | ||
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>) | ||
declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>) | ||
|
||
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32) | ||
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32) | ||
declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32) | ||
declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32) | ||
|
||
declare i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) | ||
declare i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) | ||
declare i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) | ||
declare i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) | ||
|
||
attributes #0 = { "target-features"="+sve" } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters