-
Notifications
You must be signed in to change notification settings - Fork 15.7k
[AArch64] Enable subreg liveness tracking for streaming functions. #174189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/sdesmalen-arm/srlt-mitigation-add-impdef
Are you sure you want to change the base?
[AArch64] Enable subreg liveness tracking for streaming functions. #174189
Conversation
Most use of subreg liveness tracking will be for streaming SME2 functions where it can use the strided- and contiguous form of the multi-vector LD1, see #123081 for details.
|
This is a stacked PR. See other PRs below: |
|
@llvm/pr-subscribers-backend-aarch64 Author: Sander de Smalen (sdesmalen-arm) ChangesMost use of subreg liveness tracking will be for streaming SME2 functions where it can use the strided- and contiguous form of the multi-vector LD1, see #123081 for details. Patch is 963.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174189.diff 100 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 1737a0c1529b4..194d1d94e0b25 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -417,7 +417,17 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
ReserveXRegisterForRA.set(29);
- EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
+ // To benefit from SME2's strided-register multi-vector load/store
+ // instructions we'll need to enable subreg liveness. Our longer
+ // term aim is to make this the default, regardless of streaming
+ // mode, but there are still some outstanding issues, see:
+ // https://github.com/llvm/llvm-project/pull/174188
+ // and:
+ // https://github.com/llvm/llvm-project/pull/168353
+ if (IsStreaming)
+ EnableSubregLiveness = true;
+ else
+ EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
}
const CallLowering *AArch64Subtarget::getCallLowering() const {
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index b77e90f6fdc45..05d083a654cf6 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING
; == Scalable ==
@@ -209,7 +209,6 @@ define <16 x i1> @lane_mask_v16i1_i32(i32 %index, i32 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.b, w0, w1
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
ret <16 x i1> %active.lane.mask
@@ -220,7 +219,6 @@ define <8 x i1> @lane_mask_v8i1_i32(i32 %index, i32 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.b, w0, w1
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
ret <8 x i1> %active.lane.mask
@@ -231,7 +229,6 @@ define <4 x i1> @lane_mask_v4i1_i32(i32 %index, i32 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.h, w0, w1
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
ret <4 x i1> %active.lane.mask
@@ -242,7 +239,6 @@ define <2 x i1> @lane_mask_v2i1_i32(i32 %index, i32 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.s, w0, w1
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %TC)
ret <2 x i1> %active.lane.mask
@@ -253,7 +249,6 @@ define <16 x i1> @lane_mask_v16i1_i64(i64 %index, i64 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.b, x0, x1
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %index, i64 %TC)
ret <16 x i1> %active.lane.mask
@@ -264,7 +259,6 @@ define <8 x i1> @lane_mask_v8i1_i64(i64 %index, i64 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.b, x0, x1
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %TC)
ret <8 x i1> %active.lane.mask
@@ -275,7 +269,6 @@ define <4 x i1> @lane_mask_v4i1_i64(i64 %index, i64 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.h, x0, x1
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %index, i64 %TC)
ret <4 x i1> %active.lane.mask
@@ -286,7 +279,6 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: whilelo p0.s, x0, x1
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %TC)
ret <2 x i1> %active.lane.mask
@@ -313,7 +305,6 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: mov z1.b, w1
; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC)
ret <16 x i1> %active.lane.mask
@@ -340,7 +331,6 @@ define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: mov z1.b, w1
; CHECK-STREAMING-NEXT: cmphi p0.b, p0/z, z1.b, z0.b
; CHECK-STREAMING-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC)
ret <8 x i1> %active.lane.mask
@@ -372,7 +362,6 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
; CHECK-STREAMING-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC)
ret <4 x i1> %active.lane.mask
@@ -402,7 +391,6 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255
; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
; CHECK-STREAMING-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-STREAMING-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
ret <2 x i1> %active.lane.mask
@@ -465,7 +453,6 @@ define <8 x i1> @lane_mask_v8i1_imm3() {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b, vl3
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
entry:
%active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 0, i64 3)
diff --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
index e42f2b1cfba48..b735a52e49d63 100644
--- a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
@@ -1,15 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve2,+fp8 < %s | FileCheck %s
-; RUN: llc -mattr=+sve,+sme2,+fp8 < %s | FileCheck %s
-; RUN: llc -mattr=+sme2,+fp8 --force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sve2,+fp8 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mattr=+sve,+sme2,+fp8 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mattr=+sme2,+fp8 --force-streaming -enable-subreg-liveness=true < %s | FileCheck %s
target triple = "aarch64-linux"
define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) {
; CHECK-LABEL: cvtn_bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfcvtn z0.b, { z0.h, z1.h }
; CHECK-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2)
@@ -19,8 +17,6 @@ define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bf
define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) {
; CHECK-LABEL: cvtn_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fcvtn z0.b, { z0.h, z1.h }
; CHECK-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2)
@@ -30,8 +26,6 @@ define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half>
define <vscale x 16 x i8> @cvtnb_f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) {
; CHECK-LABEL: cvtnb_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fcvtnb z0.b, { z0.s, z1.s }
; CHECK-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2)
diff --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
index e2c861b40e706..aa0b934151fef 100644
--- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
+++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mattr=+sve < %s | FileCheck %s -check-prefix CHECK-SVE
-; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1
-; RUN: llc -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2
+; RUN: llc -enable-subreg-liveness -mattr=+sve < %s | FileCheck %s -check-prefix CHECK-SVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1
+; RUN: llc -enable-subreg-liveness -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2
target triple = "aarch64-linux"
; Test combining of getActiveLaneMask with a pair of extract_vector operations.
@@ -181,8 +181,6 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SVE-NEXT: fmov s0, w8
; CHECK-SVE-NEXT: mov v0.s[1], v1.s[1]
; CHECK-SVE-NEXT: ext z1.b, z1.b, z1.b, #8
-; CHECK-SVE-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SVE-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SVE-NEXT: b use
;
; CHECK-SVE2p1-LABEL: test_fixed_extract:
@@ -193,8 +191,6 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SVE2p1-NEXT: fmov s0, w8
; CHECK-SVE2p1-NEXT: mov v0.s[1], v1.s[1]
; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z1.b, #8
-; CHECK-SVE2p1-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SVE2p1-NEXT: b use
;
; CHECK-SME2-LABEL: test_fixed_extract:
@@ -205,9 +201,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SME2-NEXT: fmov s2, w8
; CHECK-SME2-NEXT: mov z0.s, z1.s[1]
; CHECK-SME2-NEXT: ext z1.b, z1.b, z1.b, #8
-; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SME2-NEXT: zip1 z0.s, z2.s, z0.s
-; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-SME2-NEXT: b use
%r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n)
%v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0)
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index ca16df3c09ade..49a0086a7be54 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING
; WITH VSCALE RANGE
@@ -145,7 +145,6 @@ define i32 @ctz_nxv2i1(<vscale x 2 x i1> %a) {
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.d
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 0)
ret i32 %res
@@ -157,7 +156,6 @@ define i32 @ctz_nxv2i1_poison(<vscale x 2 x i1> %a) {
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.d
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 1)
ret i32 %res
@@ -179,10 +177,8 @@ define i32 @add_i32_ctz_nxv2i1_poison(<vscale x 2 x i1> %a, i32 %b) {
; CHECK-LABEL: add_i32_ctz_nxv2i1_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: incp x0, p0.d
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> %a, i1 1)
%trunc = trunc i64 %res to i32
@@ -196,7 +192,6 @@ define i32 @ctz_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 0)
ret i32 %res
@@ -208,7 +203,6 @@ define i32 @ctz_nxv4i1_poison(<vscale x 4 x i1> %a) {
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 1)
ret i32 %res
@@ -230,10 +224,8 @@ define i32 @add_i32_ctz_nxv4i1_poison(<vscale x 4 x i1> %a, i32 %b) {
; CHECK-LABEL: add_i32_ctz_nxv4i1_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: incp x0, p0.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> %a, i1 1)
%trunc = trunc i64 %res to i32
@@ -247,7 +239,6 @@ define i32 @ctz_nxv8i1(<vscale x 8 x i1> %a) {
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 0)
ret i32 %res
@@ -259,7 +250,6 @@ define i32 @ctz_nxv8i1_poison(<vscale x 8 x i1> %a) {
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 1)
ret i32 %res
@@ -281,10 +271,8 @@ define i32 @add_i32_ctz_nxv8i1_poison(<vscale x 8 x i1> %a, i32 %b) {
; CHECK-LABEL: add_i32_ctz_nxv8i1_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: incp x0, p0.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> %a, i1 1)
%trunc = trunc i64 %res to i32
@@ -298,7 +286,6 @@ define i32 @ctz_nxv16i1(<vscale x 16 x i1> %a) {
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0)
ret i32 %res
@@ -310,7 +297,6 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1)
ret i32 %res
@@ -323,7 +309,6 @@ define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsca
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%cmp = icmp ne <vscale x 16 x i8> %a, %b
%select = select <vscale x 16 x i1> %pg, <vscale x 16 x i1> %cmp, <vscale x 16 x i1> zeroinitializer
@@ -348,10 +333,8 @@ define i32 @add_i32_ctz_nxv16i1_poison(<vscale x 16 x i1> %a, i32 %b) {
; CHECK-LABEL: add_i32_ctz_nxv16i1_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: incp x0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> %a, i1 1)
%trunc = trunc i64 %res to i32
@@ -370,20 +353,17 @@ define i32 @ctz_v16i1(<16 x i1> %a) {
; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
; NONSTREAMING-NEXT: cntp x0, p0, p0.b
-; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
; NONSTREAMING-NEXT: ret
;
; STREAMING-LABEL: ctz_v16i1:
; STREAMING: // %bb.0:
-; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0
-; STREAMING-NEXT: ptrue p0.b, vl16
; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p0.b, vl16
; STREAMING-NEXT: ptrue p1.b
; STREAMING-NEXT: asr z0.b, z0.b, #7
; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
; STREAMING-NEXT: cntp x0, p0, p0.b
-; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
ret i32 %res
@@ -398,20 +378,17 @@ define i32 @ctz_v16i1_poison(<16 x i1> %a) {
; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
; NONSTREAMING-NEXT: cntp x0, p0, p0.b
-; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
; NONSTREAMING-NEXT: ret
;
; STREAMING-LABEL: ctz_v16i1_poison:
; STREAMING: // %bb.0:
-; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0
-; STREAMING-NEXT: ptrue p0.b, vl16
; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p0.b, vl16
; STREAMING-NEXT: ptrue p1.b
; STREAMING-NEXT: asr z0.b, z0.b, #7
; STREAMING-NEXT: cmpne p0.b, p...
[truncated]
|
🐧 Linux x64 Test Results
All executed tests passed, but another part of the build failed. Click on a failure below to see the details. lib/Target/AArch64/CMakeFiles/LLVMAArch64CodeGen.dir/AArch64SRLTDefineSuperRegs.cpp.oIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
Most use of subreg liveness tracking will be for streaming SME2 functions where it can use the strided- and contiguous form of the multi-vector LD1, see #123081 for details.