91 changes: 46 additions & 45 deletions llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s

Expand Down Expand Up @@ -31,28 +32,28 @@ define i32 @ctz_nxv32i1(<vscale x 32 x i1> %a) #0 {
; CHECK-NEXT: neg x8, x8
; CHECK-NEXT: punpklo p3.h, p1.b
; CHECK-NEXT: rdvl x9, #2
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: rdvl x8, #-1
; CHECK-NEXT: punpkhi p1.h, p1.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: inch z0.h, all, mul #4
; CHECK-NEXT: punpkhi p1.h, p1.b
; CHECK-NEXT: mov z3.h, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p2.h
; CHECK-NEXT: inch z0.h, all, mul #4
; CHECK-NEXT: mov z5.h, p3/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z1.h, z0.h, z1.h
; CHECK-NEXT: add z4.h, z0.h, z2.h
; CHECK-NEXT: mov z6.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z7.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: add z1.h, z0.h, z1.h
; CHECK-NEXT: add z4.h, z0.h, z2.h
; CHECK-NEXT: and z0.d, z0.d, z3.d
; CHECK-NEXT: add z2.h, z1.h, z2.h
; CHECK-NEXT: and z3.d, z4.d, z5.d
; CHECK-NEXT: and z1.d, z1.d, z6.d
; CHECK-NEXT: and z2.d, z2.d, z7.d
; CHECK-NEXT: umax z0.h, p2/m, z0.h, z3.h
; CHECK-NEXT: umax z1.h, p2/m, z1.h, z2.h
; CHECK-NEXT: umax z0.h, p2/m, z0.h, z1.h
; CHECK-NEXT: umaxv h0, p2, z0.h
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z3.h
; CHECK-NEXT: umax z1.h, p0/m, z1.h, z2.h
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: umaxv h0, p0, z0.h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: sub w8, w9, w8
; CHECK-NEXT: and w0, w8, #0xffff
Expand All @@ -65,12 +66,12 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: ctz_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: index z1.s, #0, #-1
; CHECK-NEXT: cntw x9
; CHECK-NEXT: incw z1.s
; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: and z0.d, z1.d, z0.d
; CHECK-NEXT: index z0.s, #0, #-1
; CHECK-NEXT: mov z1.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: incw z0.s
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: and z0.s, z0.s, #0xff
; CHECK-NEXT: umaxv s0, p0, z0.s
; CHECK-NEXT: fmov w8, s0
Expand All @@ -87,38 +88,38 @@ define i64 @vscale_4096(<vscale x 16 x i8> %a) #1 {
; CHECK-LABEL: vscale_4096:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: index z1.s, #0, #-1
; CHECK-NEXT: cntw x8
; CHECK-NEXT: cnth x9
; CHECK-NEXT: neg x8, x8
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: neg x8, x9
; CHECK-NEXT: rdvl x9, #1
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
; CHECK-NEXT: index z0.s, #0, #-1
; CHECK-NEXT: mov z0.s, w8
; CHECK-NEXT: cnth x8
; CHECK-NEXT: neg x8, x8
; CHECK-NEXT: incw z1.s, all, mul #4
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: incw z0.s, all, mul #4
; CHECK-NEXT: add z1.s, z0.s, z1.s
; CHECK-NEXT: add z5.s, z0.s, z2.s
; CHECK-NEXT: add z0.s, z1.s, z0.s
; CHECK-NEXT: add z4.s, z1.s, z2.s
; CHECK-NEXT: punpkhi p2.h, p1.b
; CHECK-NEXT: punpkhi p3.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: add z2.s, z1.s, z2.s
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: mov z3.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p2.s
; CHECK-NEXT: mov z4.s, p3/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: add z2.s, z0.s, z2.s
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: mov z5.s, p3/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z6.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z7.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: and z2.d, z2.d, z4.d
; CHECK-NEXT: and z3.d, z5.d, z6.d
; CHECK-NEXT: and z0.d, z0.d, z7.d
; CHECK-NEXT: umax z1.s, p2/m, z1.s, z2.s
; CHECK-NEXT: umax z0.s, p2/m, z0.s, z3.s
; CHECK-NEXT: umax z0.s, p2/m, z0.s, z1.s
; CHECK-NEXT: umaxv s0, p2, z0.s
; CHECK-NEXT: and z0.d, z0.d, z3.d
; CHECK-NEXT: and z2.d, z2.d, z5.d
; CHECK-NEXT: and z3.d, z4.d, z6.d
; CHECK-NEXT: and z1.d, z1.d, z7.d
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: umax z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: umaxv s0, p0, z0.s
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: sub w0, w9, w8
; CHECK-NEXT: ret
Expand All @@ -130,21 +131,21 @@ define i64 @vscale_4096_poison(<vscale x 16 x i8> %a) #1 {
; CHECK-LABEL: vscale_4096_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: index z1.h, #0, #-1
; CHECK-NEXT: cnth x8
; CHECK-NEXT: rdvl x9, #1
; CHECK-NEXT: neg x8, x8
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: rdvl x9, #1
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
; CHECK-NEXT: index z0.h, #0, #-1
; CHECK-NEXT: mov z0.h, w8
; CHECK-NEXT: inch z1.h, all, mul #2
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: inch z0.h, all, mul #2
; CHECK-NEXT: add z1.h, z0.h, z1.h
; CHECK-NEXT: add z0.h, z1.h, z0.h
; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z3.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z3.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: umaxv h0, p0, z0.h
; CHECK-NEXT: fmov w8, s0
Expand All @@ -161,16 +162,16 @@ define i32 @ctz_nxv8i1_no_range(<vscale x 8 x i1> %a) {
; CHECK-LABEL: ctz_nxv8i1_no_range:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #0, #-1
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: cntw x8
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: neg x8, x8
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: cnth x9
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: incw z0.s, all, mul #2
; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: incw z0.s, all, mul #2
; CHECK-NEXT: add z1.s, z0.s, z1.s
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
Expand Down Expand Up @@ -212,8 +213,8 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: ctz_and_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.b
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
Expand Down
1,132 changes: 566 additions & 566 deletions llvm/test/CodeGen/AArch64/itofp.ll

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/ldexp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
define double @testExp(double %val, i32 %a) {
; CHECK-LABEL: testExp:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fscale z0.d, p0/m, z0.d, z1.d
Expand All @@ -22,8 +22,8 @@ declare double @ldexp(double, i32) memory(none)
define float @testExpf(float %val, i32 %a) {
; CHECK-LABEL: testExpf:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fmov s1, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
Expand All @@ -49,9 +49,9 @@ declare fp128 @ldexpl(fp128, i32) memory(none)
define half @testExpf16(half %val, i32 %a) {
; CHECK-LABEL: testExpf16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fmov s1, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fscale z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
Expand Down
43 changes: 21 additions & 22 deletions llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ define <vscale x 16 x i8> @sdiv_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; CHECK: // %bb.0:
; CHECK-NEXT: sunpkhi z2.h, z1.b
; CHECK-NEXT: sunpkhi z3.h, z0.b
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sunpklo z1.h, z1.b
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sunpkhi z4.s, z2.h
; CHECK-NEXT: sunpkhi z5.s, z3.h
; CHECK-NEXT: sunpklo z2.s, z2.h
Expand All @@ -36,11 +36,11 @@ define <vscale x 16 x i8> @sdiv_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
define <vscale x 8 x i16> @sdiv_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sdiv_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sunpkhi z2.s, z1.h
; CHECK-NEXT: sunpkhi z3.s, z0.h
; CHECK-NEXT: sunpklo z1.s, z1.h
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
Expand Down Expand Up @@ -140,9 +140,9 @@ define <vscale x 16 x i8> @srem_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
define <vscale x 8 x i16> @srem_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: srem_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sunpkhi z2.s, z1.h
; CHECK-NEXT: sunpkhi z3.s, z0.h
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sunpklo z4.s, z0.h
; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: sunpklo z3.s, z1.h
Expand Down Expand Up @@ -188,9 +188,9 @@ define <vscale x 16 x i8> @udiv_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; CHECK: // %bb.0:
; CHECK-NEXT: uunpkhi z2.h, z1.b
; CHECK-NEXT: uunpkhi z3.h, z0.b
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpklo z1.h, z1.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpkhi z4.s, z2.h
; CHECK-NEXT: uunpkhi z5.s, z3.h
; CHECK-NEXT: uunpklo z2.s, z2.h
Expand All @@ -214,11 +214,11 @@ define <vscale x 16 x i8> @udiv_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
define <vscale x 8 x i16> @udiv_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: udiv_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpkhi z2.s, z1.h
; CHECK-NEXT: uunpkhi z3.s, z0.h
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
Expand Down Expand Up @@ -261,9 +261,9 @@ define <vscale x 8 x i32> @udiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i
define <vscale x 2 x i32> @udiv_widen_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
; CHECK-LABEL: udiv_widen_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%div = udiv <vscale x 2 x i32> %a, %b
Expand Down Expand Up @@ -319,9 +319,9 @@ define <vscale x 16 x i8> @urem_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
define <vscale x 8 x i16> @urem_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: urem_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpkhi z2.s, z1.h
; CHECK-NEXT: uunpkhi z3.s, z0.h
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpklo z4.s, z0.h
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: uunpklo z3.s, z1.h
Expand Down Expand Up @@ -558,9 +558,9 @@ define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i
define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
; CHECK-LABEL: umin_promote_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%cmp = icmp ult <vscale x 8 x i8> %a, %b
Expand Down Expand Up @@ -704,9 +704,9 @@ define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16
define <vscale x 2 x i32> @umax_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
; CHECK-LABEL: umax_promote_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%cmp = icmp ugt <vscale x 2 x i32> %a, %b
Expand Down Expand Up @@ -883,8 +883,8 @@ define <vscale x 4 x i64> @lsl_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i6
define <vscale x 4 x i16> @lsl_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b){
; CHECK-LABEL: lsl_promote_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: and z1.s, z1.s, #0xffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%shl = shl <vscale x 4 x i16> %a, %b
Expand Down Expand Up @@ -982,9 +982,9 @@ define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
define <vscale x 8 x i8> @lsr_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b){
; CHECK-LABEL: lsr_promote_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%shr = lshr <vscale x 8 x i8> %a, %b
Expand Down Expand Up @@ -1081,10 +1081,10 @@ declare <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x
define <vscale x 2 x i64> @fshl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c){
; CHECK-LABEL: fshl_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z3.d, #63 // =0x3f
; CHECK-NEXT: mov z4.d, z2.d
; CHECK-NEXT: lsr z1.d, z1.d, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: bic z2.d, z3.d, z2.d
; CHECK-NEXT: and z4.d, z4.d, #0x3f
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z4.d
Expand All @@ -1098,17 +1098,16 @@ define <vscale x 2 x i64> @fshl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
define <vscale x 4 x i64> @fshl_illegal_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c){
; CHECK-LABEL: fshl_illegal_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z6.d, #63 // =0x3f
; CHECK-NEXT: mov z7.d, z4.d
; CHECK-NEXT: lsr z2.d, z2.d, #1
; CHECK-NEXT: lsr z3.d, z3.d, #1
; CHECK-NEXT: bic z4.d, z6.d, z4.d
; CHECK-NEXT: and z7.d, z7.d, #0x3f
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: bic z7.d, z6.d, z4.d
; CHECK-NEXT: and z4.d, z4.d, #0x3f
; CHECK-NEXT: bic z6.d, z6.d, z5.d
; CHECK-NEXT: and z5.d, z5.d, #0x3f
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z7.d
; CHECK-NEXT: lsr z2.d, p0/m, z2.d, z4.d
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z4.d
; CHECK-NEXT: lsr z2.d, p0/m, z2.d, z7.d
; CHECK-NEXT: lsr z3.d, p0/m, z3.d, z6.d
; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z5.d
; CHECK-NEXT: orr z0.d, z0.d, z2.d
Expand All @@ -1121,9 +1120,9 @@ define <vscale x 4 x i64> @fshl_illegal_i64(<vscale x 4 x i64> %a, <vscale x 4 x
define <vscale x 2 x i64> @fshl_rot_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
; CHECK-LABEL: fshl_rot_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: subr z1.d, z1.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z2.d, z2.d, #0x3f
; CHECK-NEXT: and z1.d, z1.d, #0x3f
; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z0.d
Expand All @@ -1138,11 +1137,11 @@ define <vscale x 2 x i64> @fshl_rot_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64
define <vscale x 4 x i64> @fshl_rot_illegal_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b){
; CHECK-LABEL: fshl_rot_illegal_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z4.d, z2.d
; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0
; CHECK-NEXT: mov z5.d, z3.d
; CHECK-NEXT: subr z3.d, z3.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z4.d, z4.d, #0x3f
; CHECK-NEXT: and z2.d, z2.d, #0x3f
; CHECK-NEXT: and z5.d, z5.d, #0x3f
Expand Down Expand Up @@ -1175,10 +1174,10 @@ define <vscale x 2 x i64> @fshl_rot_const_i64(<vscale x 2 x i64> %a){
define <vscale x 2 x i64> @fshr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c){
; CHECK-LABEL: fshr_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z3.d, #63 // =0x3f
; CHECK-NEXT: mov z4.d, z2.d
; CHECK-NEXT: lsl z0.d, z0.d, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: bic z2.d, z3.d, z2.d
; CHECK-NEXT: and z4.d, z4.d, #0x3f
; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z4.d
Expand All @@ -1192,9 +1191,9 @@ define <vscale x 2 x i64> @fshr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
define <vscale x 2 x i64> @fshr_rot_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
; CHECK-LABEL: fshr_rot_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: subr z1.d, z1.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z2.d, z2.d, #0x3f
; CHECK-NEXT: and z1.d, z1.d, #0x3f
; CHECK-NEXT: lsrr z2.d, p0/m, z2.d, z0.d
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AArch64/load-insert-zero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -469,18 +469,18 @@ define void @predictor_4x4_neon(ptr nocapture noundef writeonly %0, i64 noundef
; CHECK-NEXT: lsr w8, w8, #24
; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
; CHECK-NEXT: urhadd v1.8b, v1.8b, v2.8b
; CHECK-NEXT: str s1, [x0]
; CHECK-NEXT: add v0.8h, v0.8h, v3.8h
; CHECK-NEXT: dup v3.8b, w8
; CHECK-NEXT: str s1, [x0]
; CHECK-NEXT: lsl x8, x1, #1
; CHECK-NEXT: rshrn v0.8b, v0.8h, #2
; CHECK-NEXT: zip1 v2.2s, v1.2s, v3.2s
; CHECK-NEXT: str s0, [x0, x1]
; CHECK-NEXT: zip1 v3.2s, v0.2s, v3.2s
; CHECK-NEXT: ext v2.8b, v2.8b, v0.8b, #1
; CHECK-NEXT: ext v1.8b, v3.8b, v0.8b, #1
; CHECK-NEXT: str s2, [x0, x8]
; CHECK-NEXT: add x8, x8, x1
; CHECK-NEXT: ext v1.8b, v3.8b, v0.8b, #1
; CHECK-NEXT: str s1, [x0, x8]
; CHECK-NEXT: ret
%5 = load i32, ptr %2, align 4
Expand Down Expand Up @@ -608,9 +608,9 @@ define void @predictor_4x4_neon_new(ptr nocapture noundef writeonly %0, i64 noun
define <vscale x 8 x i8> @loadnxv8i8(ptr %p) {
; CHECK-LABEL: loadnxv8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: mov z0.h, p0/m, w8
; CHECK-NEXT: ret
%l = load i8, ptr %p
Expand All @@ -631,9 +631,9 @@ define <vscale x 16 x i8> @loadnxv16i8(ptr %p) {
define <vscale x 4 x i16> @loadnxv4i16(ptr %p) {
; CHECK-LABEL: loadnxv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl1
; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: ptrue p0.s, vl1
; CHECK-NEXT: mov z0.s, p0/m, w8
; CHECK-NEXT: ret
%l = load i16, ptr %p
Expand All @@ -654,9 +654,9 @@ define <vscale x 8 x i16> @loadnxv8i16(ptr %p) {
define <vscale x 2 x i32> @loadnxv2i32(ptr %p) {
; CHECK-LABEL: loadnxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: mov z0.d, p0/m, x8
; CHECK-NEXT: ret
%l = load i32, ptr %p
Expand Down Expand Up @@ -688,9 +688,9 @@ define <vscale x 2 x i64> @loadnxv2i64(ptr %p) {
define <vscale x 4 x half> @loadnxv4f16(ptr %p) {
; CHECK-LABEL: loadnxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z0.h, #0 // =0x0
Expand All @@ -715,9 +715,9 @@ define <vscale x 8 x half> @loadnxv8f16(ptr %p) {
define <vscale x 4 x bfloat> @loadnxv4bf16(ptr %p) {
; CHECK-LABEL: loadnxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z0.h, #0 // =0x0
Expand All @@ -742,9 +742,9 @@ define <vscale x 8 x bfloat> @loadnxv8bf16(ptr %p) {
define <vscale x 2 x float> @loadnxv2f32(ptr %p) {
; CHECK-LABEL: loadnxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
; CHECK-NEXT: mov z0.s, #0 // =0x0
Expand Down Expand Up @@ -782,9 +782,9 @@ define <vscale x 2 x double> @loadnxv2f64(ptr %p) {
define <vscale x 8 x i8> @loadnxv8i8_offset(ptr %p) {
; CHECK-LABEL: loadnxv8i8_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: ldrb w8, [x0, #1]
; CHECK-NEXT: ptrue p0.h, vl1
; CHECK-NEXT: mov z0.h, p0/m, w8
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
Expand All @@ -807,9 +807,9 @@ define <vscale x 16 x i8> @loadnxv16i8_offset(ptr %p) {
define <vscale x 4 x i16> @loadnxv4i16_offset(ptr %p) {
; CHECK-LABEL: loadnxv4i16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl1
; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: ldurh w8, [x0, #1]
; CHECK-NEXT: ptrue p0.s, vl1
; CHECK-NEXT: mov z0.s, p0/m, w8
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
Expand All @@ -832,9 +832,9 @@ define <vscale x 8 x i16> @loadnxv8i16_offset(ptr %p) {
define <vscale x 2 x i32> @loadnxv2i32_offset(ptr %p) {
; CHECK-LABEL: loadnxv2i32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: ldur w8, [x0, #1]
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: mov z0.d, p0/m, x8
; CHECK-NEXT: ret
%g = getelementptr inbounds i8, ptr %p, i64 1
Expand Down Expand Up @@ -869,9 +869,9 @@ define <vscale x 2 x i64> @loadnxv2i64_offset(ptr %p) {
define <vscale x 4 x half> @loadnxv4f16_offset(ptr %p) {
; CHECK-LABEL: loadnxv4f16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z0.h, #0 // =0x0
Expand All @@ -898,9 +898,9 @@ define <vscale x 8 x half> @loadnxv8f16_offset(ptr %p) {
define <vscale x 4 x bfloat> @loadnxv4bf16_offset(ptr %p) {
; CHECK-LABEL: loadnxv4bf16_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z0.h, #0 // =0x0
Expand All @@ -927,9 +927,9 @@ define <vscale x 8 x bfloat> @loadnxv8bf16_offset(ptr %p) {
define <vscale x 2 x float> @loadnxv2f32_offset(ptr %p) {
; CHECK-LABEL: loadnxv2f32_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
; CHECK-NEXT: mov z0.s, #0 // =0x0
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AArch64/logic-shift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ define i32 @or_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
define <8 x i16> @or_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: or_lshr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.8h, v2.8h
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h
; CHECK-NEXT: neg v1.8h, v2.8h
; CHECK-NEXT: ushl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <8 x i16> %x0, %y
Expand All @@ -49,9 +49,9 @@ define <8 x i16> @or_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <
define <2 x i64> @or_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: or_lshr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.2d, v2.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-NEXT: neg v1.2d, v2.2d
; CHECK-NEXT: ushl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <2 x i64> %x0, %y
Expand Down Expand Up @@ -94,9 +94,9 @@ define i64 @or_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
define <4 x i32> @or_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: or_ashr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshl v0.4s, v0.4s, v2.4s
; CHECK-NEXT: neg v1.4s, v2.4s
; CHECK-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <4 x i32> %x0, %y
Expand All @@ -109,9 +109,9 @@ define <4 x i32> @or_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <
define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
; CHECK-LABEL: or_ashr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v2.16b
; CHECK-NEXT: neg v1.16b, v2.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <16 x i8> %x0, %y
Expand Down Expand Up @@ -262,9 +262,9 @@ define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: xor_lshr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.8h, v2.8h
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h
; CHECK-NEXT: neg v1.8h, v2.8h
; CHECK-NEXT: ushl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <8 x i16> %x0, %y
Expand All @@ -277,9 +277,9 @@ define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y,
define <2 x i64> @xor_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: xor_lshr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.2d, v2.2d
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-NEXT: neg v1.2d, v2.2d
; CHECK-NEXT: ushl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <2 x i64> %x0, %y
Expand Down Expand Up @@ -322,9 +322,9 @@ define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: xor_ashr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshl v0.4s, v0.4s, v2.4s
; CHECK-NEXT: neg v1.4s, v2.4s
; CHECK-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <4 x i32> %x0, %y
Expand All @@ -337,9 +337,9 @@ define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y,
define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
; CHECK-LABEL: xor_ashr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.16b, v2.16b
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v2.16b
; CHECK-NEXT: neg v1.16b, v2.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <16 x i8> %x0, %y
Expand Down Expand Up @@ -490,9 +490,9 @@ define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: and_lshr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.8h, v2.8h
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h
; CHECK-NEXT: neg v1.8h, v2.8h
; CHECK-NEXT: ushl v0.8h, v0.8h, v1.8h
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <8 x i16> %x0, %y
Expand All @@ -505,9 +505,9 @@ define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y,
define <2 x i64> @and_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: and_lshr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.2d, v2.2d
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-NEXT: neg v1.2d, v2.2d
; CHECK-NEXT: ushl v0.2d, v0.2d, v1.2d
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <2 x i64> %x0, %y
Expand Down Expand Up @@ -550,9 +550,9 @@ define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: and_ashr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshl v0.4s, v0.4s, v2.4s
; CHECK-NEXT: neg v1.4s, v2.4s
; CHECK-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <4 x i32> %x0, %y
Expand All @@ -565,9 +565,9 @@ define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y,
define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
; CHECK-LABEL: and_ashr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.16b, v2.16b
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v2.16b
; CHECK-NEXT: neg v1.16b, v2.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <16 x i8> %x0, %y
Expand Down
76 changes: 38 additions & 38 deletions llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,10 @@ define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
; CHECK-NEXT: ptrue p2.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
; CHECK-NEXT: and z1.d, z1.d, #0x1
; CHECK-NEXT: cmpne p0.d, p2/z, z1.d, #0
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
; CHECK-NEXT: ret
%res = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 1)
ret <vscale x 2 x i1> %res
Expand All @@ -273,10 +273,10 @@ define <vscale x 4 x i1> @splice_nxv4i1_idx(<vscale x 4 x i1> %a, <vscale x 4 x
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
; CHECK-NEXT: ptrue p2.s
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
; CHECK-NEXT: and z1.s, z1.s, #0x1
; CHECK-NEXT: cmpne p0.s, p2/z, z1.s, #0
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
; CHECK-NEXT: ret
%res = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 2)
ret <vscale x 4 x i1> %res
Expand All @@ -288,10 +288,10 @@ define <vscale x 8 x i1> @splice_nxv8i1_idx(<vscale x 8 x i1> %a, <vscale x 8 x
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
; CHECK-NEXT: ptrue p2.h
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
; CHECK-NEXT: and z1.h, z1.h, #0x1
; CHECK-NEXT: cmpne p0.h, p2/z, z1.h, #0
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
; CHECK-NEXT: ret
%res = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 4)
ret <vscale x 8 x i1> %res
Expand All @@ -303,10 +303,10 @@ define <vscale x 16 x i1> @splice_nxv16i1_idx(<vscale x 16 x i1> %a, <vscale x 1
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
; CHECK-NEXT: and z1.b, z1.b, #0x1
; CHECK-NEXT: cmpne p0.b, p2/z, z1.b, #0
; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
; CHECK-NEXT: ret
%res = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 8)
ret <vscale x 16 x i1> %res
Expand Down Expand Up @@ -350,16 +350,16 @@ define <vscale x 16 x float> @splice_nxv16f32_16(<vscale x 16 x float> %a, <vsca
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-8
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: add x10, x9, x8, lsl #2
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
; CHECK-NEXT: add x10, x9, x8, lsl #2
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl]
Expand Down Expand Up @@ -452,16 +452,16 @@ define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov w9, #17 // =0x11
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: cmp x8, #17
; CHECK-NEXT: add x10, x10, x8
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: sub x8, x10, x8
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: csel x9, x8, x9, lo
; CHECK-NEXT: add x8, x10, x8
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand Down Expand Up @@ -497,16 +497,16 @@ define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: cmp x8, #18
; CHECK-NEXT: add x10, x10, x8
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: sub x8, x10, x8
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: csel x9, x8, x9, lo
; CHECK-NEXT: add x8, x10, x8
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand Down Expand Up @@ -608,16 +608,16 @@ define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov w9, #18 // =0x12
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: cmp x8, #18
; CHECK-NEXT: add x10, x10, x8
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: sub x8, x10, x8
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: csel x9, x8, x9, lo
; CHECK-NEXT: add x8, x10, x8
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand Down Expand Up @@ -699,9 +699,9 @@ define <vscale x 2 x i1> @splice_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1>
; CHECK-NEXT: ptrue p2.d, vl1
; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
; CHECK-NEXT: rev p0.d, p2.d
; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: rev p2.d, p2.d
; CHECK-NEXT: splice z1.d, p2, z1.d, z0.d
; CHECK-NEXT: and z1.d, z1.d, #0x1
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
; CHECK-NEXT: ret
Expand All @@ -716,9 +716,9 @@ define <vscale x 4 x i1> @splice_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1>
; CHECK-NEXT: ptrue p2.s, vl1
; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
; CHECK-NEXT: rev p0.s, p2.s
; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: rev p2.s, p2.s
; CHECK-NEXT: splice z1.s, p2, z1.s, z0.s
; CHECK-NEXT: and z1.s, z1.s, #0x1
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
; CHECK-NEXT: ret
Expand All @@ -733,9 +733,9 @@ define <vscale x 8 x i1> @splice_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1>
; CHECK-NEXT: ptrue p2.h, vl1
; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
; CHECK-NEXT: rev p0.h, p2.h
; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: rev p2.h, p2.h
; CHECK-NEXT: splice z1.h, p2, z1.h, z0.h
; CHECK-NEXT: and z1.h, z1.h, #0x1
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
; CHECK-NEXT: ret
Expand All @@ -750,9 +750,9 @@ define <vscale x 16 x i1> @splice_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x
; CHECK-NEXT: ptrue p2.b, vl1
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
; CHECK-NEXT: rev p0.b, p2.b
; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: rev p2.b, p2.b
; CHECK-NEXT: splice z1.b, p2, z1.b, z0.b
; CHECK-NEXT: and z1.b, z1.b, #0x1
; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
; CHECK-NEXT: ret
Expand Down Expand Up @@ -783,8 +783,8 @@ define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: add x8, x9, x8
; CHECK-NEXT: mov x9, #-8 // =0xfffffffffffffff8
; CHECK-NEXT: sub x10, x8, #32
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: sub x10, x8, #32
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
Expand All @@ -803,16 +803,16 @@ define <vscale x 16 x float> @splice_nxv16f32_neg17(<vscale x 16 x float> %a, <v
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-8
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: rdvl x8, #4
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w9, #68 // =0x44
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: cmp x8, #68
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: csel x9, x8, x9, lo
; CHECK-NEXT: add x8, x10, x8
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl]
Expand Down
758 changes: 379 additions & 379 deletions llvm/test/CodeGen/AArch64/neon-dotreduce.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AArch64/neon-extadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -515,15 +515,15 @@ define <20 x i32> @v20(<20 x i8> %s0, <20 x i8> %s1) {
; CHECK-NEXT: mov v0.b[6], w6
; CHECK-NEXT: ld1 { v1.b }[7], [x9]
; CHECK-NEXT: uaddl v2.8h, v3.8b, v2.8b
; CHECK-NEXT: ushll v3.4s, v4.4h, #0
; CHECK-NEXT: mov v0.b[7], w7
; CHECK-NEXT: ushll2 v3.4s, v2.8h, #0
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
; CHECK-NEXT: ushll v1.4s, v4.4h, #0
; CHECK-NEXT: stp q3, q1, [x8, #48]
; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
; CHECK-NEXT: ushll2 v1.4s, v2.8h, #0
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: stp q1, q3, [x8, #48]
; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: stp q1, q2, [x8, #16]
; CHECK-NEXT: stp q3, q2, [x8, #16]
; CHECK-NEXT: str q0, [x8]
; CHECK-NEXT: ret
entry:
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/neon-shift-neg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ entry:
define <vscale x 2 x i64> @shrn64x2(<vscale x 2 x i64> %a, i64 %b) {
; CHECK-LABEL: shrn64x2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: neg x8, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -391,8 +391,8 @@ entry:
define <vscale x 4 x i32> @shrn32x4(<vscale x 4 x i32> %a, i32 %b) {
; CHECK-LABEL: shrn32x4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: neg w8, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
Expand All @@ -407,8 +407,8 @@ entry:
define <vscale x 8 x i16> @shrn16x8(<vscale x 8 x i16> %a, i16 %b) {
; CHECK-LABEL: shrn16x8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: neg w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
Expand All @@ -423,8 +423,8 @@ entry:
define <vscale x 16 x i8> @shrn8x16(<vscale x 16 x i8> %a, i8 %b) {
; CHECK-LABEL: shrn8x16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: neg w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
Expand All @@ -439,8 +439,8 @@ entry:
define <vscale x 2 x i64> @lshrn64x2(<vscale x 2 x i64> %a, i64 %b) {
; CHECK-LABEL: lshrn64x2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: neg x8, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -455,8 +455,8 @@ entry:
define <vscale x 4 x i32> @lshrn32x4(<vscale x 4 x i32> %a, i32 %b) {
; CHECK-LABEL: lshrn32x4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: neg w8, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
Expand All @@ -471,8 +471,8 @@ entry:
define <vscale x 8 x i16> @lshrn16x8(<vscale x 8 x i16> %a, i16 %b) {
; CHECK-LABEL: lshrn16x8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: neg w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
Expand All @@ -487,8 +487,8 @@ entry:
define <vscale x 16 x i8> @lshrn8x16(<vscale x 16 x i8> %a, i8 %b) {
; CHECK-LABEL: lshrn8x16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: neg w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
Expand All @@ -503,8 +503,8 @@ entry:
define <vscale x 2 x i64> @shln64x2(<vscale x 2 x i64> %a, i64 %b) {
; CHECK-LABEL: shln64x2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: neg x8, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand All @@ -519,8 +519,8 @@ entry:
define <vscale x 4 x i32> @shln32x4(<vscale x 4 x i32> %a, i32 %b) {
; CHECK-LABEL: shln32x4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: neg w8, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
Expand All @@ -535,8 +535,8 @@ entry:
define <vscale x 8 x i16> @shln16x8(<vscale x 8 x i16> %a, i16 %b) {
; CHECK-LABEL: shln16x8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: neg w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
Expand All @@ -551,8 +551,8 @@ entry:
define <vscale x 16 x i8> @shln8x16(<vscale x 16 x i8> %a, i8 %b) {
; CHECK-LABEL: shln8x16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: neg w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AArch64/predicated-add-sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ define <vscale x 2 x i64> @zext.add.2xi64(<vscale x 2 x i64> %a, <vscale x 2 x i
define <vscale x 8 x i32> @zext.add.8xi32(<vscale x 8 x i32> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: zext.add.8xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: mov z2.s, #1 // =0x1
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s
; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s
; CHECK-NEXT: ret
%extend = zext <vscale x 8 x i1> %v to <vscale x 8 x i32>
%result = add <vscale x 8 x i32> %a, %extend
Expand All @@ -103,8 +103,8 @@ define <vscale x 16 x i32> @zext.add.16xi32(<vscale x 16 x i32> %a, <vscale x 16
; CHECK-NEXT: punpkhi p2.h, p1.b
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: punpklo p3.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z3.s, p2/m, z3.s, z4.s
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z2.s, p1/m, z2.s, z4.s
; CHECK-NEXT: add z0.s, p3/m, z0.s, z4.s
; CHECK-NEXT: add z1.s, p0/m, z1.s, z4.s
Expand Down Expand Up @@ -194,8 +194,8 @@ define <vscale x 2 x i64> @zext.sub.2xi64(<vscale x 2 x i64> %a, <vscale x 2 x i
define <vscale x 8 x i32> @zext.sub.8xi32(<vscale x 8 x i32> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: zext.sub.8xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z0.s, p1/m, z0.s, z2.s
; CHECK-NEXT: add z1.s, p0/m, z1.s, z2.s
Expand All @@ -214,8 +214,8 @@ define <vscale x 16 x i32> @zext.sub.16xi32(<vscale x 16 x i32> %a, <vscale x 16
; CHECK-NEXT: punpklo p2.h, p1.b
; CHECK-NEXT: punpkhi p1.h, p1.b
; CHECK-NEXT: punpklo p3.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z0.s, p2/m, z0.s, z4.s
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z1.s, p1/m, z1.s, z4.s
; CHECK-NEXT: add z2.s, p3/m, z2.s, z4.s
; CHECK-NEXT: add z3.s, p0/m, z3.s, z4.s
Expand Down Expand Up @@ -305,11 +305,11 @@ define <vscale x 2 x i64> @sext.add.2xi64(<vscale x 2 x i64> %a, <vscale x 2 x i
define <vscale x 8 x i32> @sext.add.8xi32(<vscale x 8 x i32> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: sext.add.8xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s
; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s
; CHECK-NEXT: ret
%extend = sext <vscale x 8 x i1> %v to <vscale x 8 x i32>
%result = add <vscale x 8 x i32> %a, %extend
Expand All @@ -325,8 +325,8 @@ define <vscale x 16 x i32> @sext.add.16xi32(<vscale x 16 x i32> %a, <vscale x 16
; CHECK-NEXT: punpkhi p2.h, p1.b
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: punpklo p3.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z3.s, p2/m, z3.s, z4.s
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: add z2.s, p1/m, z2.s, z4.s
; CHECK-NEXT: add z0.s, p3/m, z0.s, z4.s
; CHECK-NEXT: add z1.s, p0/m, z1.s, z4.s
Expand Down Expand Up @@ -416,11 +416,11 @@ define <vscale x 2 x i64> @sext.sub.2xi64(<vscale x 2 x i64> %a, <vscale x 2 x i
define <vscale x 8 x i32> @sext.sub.8xi32(<vscale x 8 x i32> %a, <vscale x 8 x i1> %v) #0 {
; CHECK-LABEL: sext.sub.8xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: punpkhi p1.h, p0.b
; CHECK-NEXT: punpklo p0.h, p0.b
; CHECK-NEXT: sub z1.s, p1/m, z1.s, z2.s
; CHECK-NEXT: sub z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: sub z1.s, p1/m, z1.s, z2.s
; CHECK-NEXT: ret
%extend = sext <vscale x 8 x i1> %v to <vscale x 8 x i32>
%result = sub <vscale x 8 x i32> %a, %extend
Expand All @@ -436,8 +436,8 @@ define <vscale x 16 x i32> @sext.sub.16xi32(<vscale x 16 x i32> %a, <vscale x 16
; CHECK-NEXT: punpkhi p2.h, p1.b
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: punpklo p3.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: sub z3.s, p2/m, z3.s, z4.s
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: sub z2.s, p1/m, z2.s, z4.s
; CHECK-NEXT: sub z0.s, p3/m, z0.s, z4.s
; CHECK-NEXT: sub z1.s, p0/m, z1.s, z4.s
Expand Down
262 changes: 129 additions & 133 deletions llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,197 +28,193 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: .cfi_offset b13, -80
; CHECK-NEXT: .cfi_offset b14, -88
; CHECK-NEXT: .cfi_offset b15, -96
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: // implicit-def: $q6
; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: adrp x10, B+48
; CHECK-NEXT: add x10, x10, :lo12:B+48
; CHECK-NEXT: adrp x11, A
; CHECK-NEXT: add x11, x11, :lo12:A
; CHECK-NEXT: // kill: killed $q6
; CHECK-NEXT: // implicit-def: $q6
; CHECK-NEXT: // kill: killed $q1
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: // implicit-def: $q0
; CHECK-NEXT: // implicit-def: $q2
; CHECK-NEXT: // implicit-def: $q3
; CHECK-NEXT: // implicit-def: $q4
; CHECK-NEXT: // implicit-def: $q5
; CHECK-NEXT: // implicit-def: $q7
; CHECK-NEXT: // implicit-def: $q16
; CHECK-NEXT: // implicit-def: $q17
; CHECK-NEXT: // implicit-def: $q10
; CHECK-NEXT: // implicit-def: $q17
; CHECK-NEXT: // implicit-def: $q6
; CHECK-NEXT: // implicit-def: $q18
; CHECK-NEXT: // implicit-def: $q19
; CHECK-NEXT: // implicit-def: $q20
; CHECK-NEXT: // implicit-def: $q21
; CHECK-NEXT: // implicit-def: $q22
; CHECK-NEXT: // implicit-def: $q23
; CHECK-NEXT: // implicit-def: $q24
; CHECK-NEXT: // implicit-def: $q25
; CHECK-NEXT: // implicit-def: $q9
; CHECK-NEXT: // implicit-def: $q27
; CHECK-NEXT: // implicit-def: $q26
; CHECK-NEXT: // implicit-def: $q12
; CHECK-NEXT: // implicit-def: $q28
; CHECK-NEXT: // implicit-def: $q30
; CHECK-NEXT: // implicit-def: $q18
; CHECK-NEXT: // implicit-def: $q14
; CHECK-NEXT: // implicit-def: $q15
; CHECK-NEXT: // implicit-def: $q29
; CHECK-NEXT: // implicit-def: $q30
; CHECK-NEXT: // implicit-def: $q11
; CHECK-NEXT: // implicit-def: $q31
; CHECK-NEXT: // implicit-def: $q12
; CHECK-NEXT: // implicit-def: $q13
; CHECK-NEXT: // implicit-def: $q11
; CHECK-NEXT: // kill: killed $q6
; CHECK-NEXT: // implicit-def: $q6
; CHECK-NEXT: // kill: killed $q6
; CHECK-NEXT: // kill: killed $q1
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: // kill: killed $q1
; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: ldr q14, [x8]
; CHECK-NEXT: mov x12, xzr
; CHECK-NEXT: str q18, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: ldr x14, [x12]
; CHECK-NEXT: ldr q15, [x12]
; CHECK-NEXT: add x7, x11, x8
; CHECK-NEXT: stp q29, q15, [sp] // 32-byte Folded Spill
; CHECK-NEXT: add x19, x11, x8
; CHECK-NEXT: fmov x15, d14
; CHECK-NEXT: mov x16, v14.d[1]
; CHECK-NEXT: ldr q18, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: fmov x18, d15
; CHECK-NEXT: mov x13, v15.d[1]
; CHECK-NEXT: ldr x5, [x8]
; CHECK-NEXT: ldr q15, [x12]
; CHECK-NEXT: ldr q14, [x10], #64
; CHECK-NEXT: ldr x7, [x7, #128]
; CHECK-NEXT: mov v8.16b, v28.16b
; CHECK-NEXT: fmov x13, d15
; CHECK-NEXT: mov x18, v15.d[1]
; CHECK-NEXT: mov v28.16b, v24.16b
; CHECK-NEXT: mul x17, x15, x14
; CHECK-NEXT: mov v6.16b, v0.16b
; CHECK-NEXT: mov v9.16b, v27.16b
; CHECK-NEXT: mov x12, v14.d[1]
; CHECK-NEXT: fmov x4, d14
; CHECK-NEXT: mov v27.16b, v23.16b
; CHECK-NEXT: mov v24.16b, v20.16b
; CHECK-NEXT: mov v20.16b, v17.16b
; CHECK-NEXT: mov v17.16b, v5.16b
; CHECK-NEXT: mul x1, x16, x14
; CHECK-NEXT: mov v23.16b, v19.16b
; CHECK-NEXT: mov v19.16b, v7.16b
; CHECK-NEXT: mov v7.16b, v2.16b
; CHECK-NEXT: stp q26, q31, [sp] // 32-byte Folded Spill
; CHECK-NEXT: mov v31.16b, v22.16b
; CHECK-NEXT: mul x0, x18, x14
; CHECK-NEXT: mov v26.16b, v10.16b
; CHECK-NEXT: mov v22.16b, v5.16b
; CHECK-NEXT: ldr q5, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr x5, [x8]
; CHECK-NEXT: ldr x19, [x19, #128]
; CHECK-NEXT: mov v29.16b, v21.16b
; CHECK-NEXT: mov v21.16b, v0.16b
; CHECK-NEXT: mul x0, x13, x14
; CHECK-NEXT: mov v25.16b, v6.16b
; CHECK-NEXT: mov v6.16b, v2.16b
; CHECK-NEXT: fmov d15, x17
; CHECK-NEXT: mov v5.16b, v1.16b
; CHECK-NEXT: mov v8.16b, v20.16b
; CHECK-NEXT: mul x2, x13, x14
; CHECK-NEXT: mov v20.16b, v16.16b
; CHECK-NEXT: mov v16.16b, v3.16b
; CHECK-NEXT: mov v10.16b, v21.16b
; CHECK-NEXT: mov v21.16b, v17.16b
; CHECK-NEXT: mov v17.16b, v4.16b
; CHECK-NEXT: mov v26.16b, v22.16b
; CHECK-NEXT: mov v22.16b, v18.16b
; CHECK-NEXT: mul x2, x18, x14
; CHECK-NEXT: mov v18.16b, v7.16b
; CHECK-NEXT: mov v7.16b, v3.16b
; CHECK-NEXT: mov v16.16b, v4.16b
; CHECK-NEXT: add x8, x8, #8
; CHECK-NEXT: add x9, x9, #1
; CHECK-NEXT: mov v15.d[1], x1
; CHECK-NEXT: mul x3, x12, x14
; CHECK-NEXT: add x8, x8, #8
; CHECK-NEXT: fmov d14, x0
; CHECK-NEXT: cmp x8, #64
; CHECK-NEXT: add x9, x9, #1
; CHECK-NEXT: fmov d14, x0
; CHECK-NEXT: mul x14, x4, x14
; CHECK-NEXT: add v18.2d, v18.2d, v15.2d
; CHECK-NEXT: mul x19, x15, x5
; CHECK-NEXT: add v5.2d, v5.2d, v15.2d
; CHECK-NEXT: mul x20, x15, x5
; CHECK-NEXT: mov v14.d[1], x2
; CHECK-NEXT: mul x15, x15, x7
; CHECK-NEXT: mul x15, x15, x19
; CHECK-NEXT: fmov d0, x14
; CHECK-NEXT: str q18, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: ldp q18, q15, [sp, #32] // 32-byte Folded Reload
; CHECK-NEXT: mul x6, x16, x5
; CHECK-NEXT: fmov d1, x19
; CHECK-NEXT: str q5, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: ldr q5, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: mul x21, x13, x19
; CHECK-NEXT: add v5.2d, v5.2d, v14.2d
; CHECK-NEXT: fmov d3, x20
; CHECK-NEXT: mul x7, x16, x5
; CHECK-NEXT: mov v0.d[1], x3
; CHECK-NEXT: mul x16, x16, x7
; CHECK-NEXT: fmov d2, x15
; CHECK-NEXT: add v15.2d, v15.2d, v14.2d
; CHECK-NEXT: mul x21, x18, x7
; CHECK-NEXT: mov v1.d[1], x6
; CHECK-NEXT: mul x0, x4, x7
; CHECK-NEXT: str q15, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: add v15.2d, v11.2d, v14.2d
; CHECK-NEXT: mov v2.d[1], x16
; CHECK-NEXT: ldr q11, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: mul x20, x13, x7
; CHECK-NEXT: fmov d3, x21
; CHECK-NEXT: add v11.2d, v11.2d, v0.2d
; CHECK-NEXT: add v12.2d, v12.2d, v1.2d
; CHECK-NEXT: mul x22, x12, x7
; CHECK-NEXT: fmov d4, x0
; CHECK-NEXT: add v18.2d, v18.2d, v2.2d
; CHECK-NEXT: mov v2.16b, v7.16b
; CHECK-NEXT: mul x14, x18, x5
; CHECK-NEXT: mov v7.16b, v19.16b
; CHECK-NEXT: mov v19.16b, v23.16b
; CHECK-NEXT: mov v3.d[1], x20
; CHECK-NEXT: mov v23.16b, v27.16b
; CHECK-NEXT: add v27.2d, v9.2d, v1.2d
; CHECK-NEXT: mul x15, x4, x5
; CHECK-NEXT: str q11, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: mov v11.16b, v15.16b
; CHECK-NEXT: mov v4.d[1], x22
; CHECK-NEXT: add v19.2d, v19.2d, v1.2d
; CHECK-NEXT: add v7.2d, v7.2d, v1.2d
; CHECK-NEXT: fmov d1, x15
; CHECK-NEXT: mul x16, x16, x19
; CHECK-NEXT: str q5, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: add v5.2d, v13.2d, v14.2d
; CHECK-NEXT: fmov d2, x21
; CHECK-NEXT: ldr q13, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: mul x6, x18, x5
; CHECK-NEXT: ldp q15, q14, [sp, #16] // 32-byte Folded Reload
; CHECK-NEXT: mov v3.d[1], x7
; CHECK-NEXT: add v13.2d, v13.2d, v0.2d
; CHECK-NEXT: mul x18, x18, x19
; CHECK-NEXT: mov v1.d[1], x16
; CHECK-NEXT: mul x22, x4, x19
; CHECK-NEXT: str q13, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: mov v13.16b, v5.16b
; CHECK-NEXT: mov v5.16b, v17.16b
; CHECK-NEXT: mov v17.16b, v20.16b
; CHECK-NEXT: mov v20.16b, v24.16b
; CHECK-NEXT: mul x13, x13, x5
; CHECK-NEXT: add v23.2d, v23.2d, v1.2d
; CHECK-NEXT: add v1.2d, v5.2d, v1.2d
; CHECK-NEXT: fmov d14, x14
; CHECK-NEXT: add v30.2d, v30.2d, v3.2d
; CHECK-NEXT: mov v3.16b, v16.16b
; CHECK-NEXT: mov v24.16b, v28.16b
; CHECK-NEXT: add v11.2d, v11.2d, v3.2d
; CHECK-NEXT: mov v2.d[1], x18
; CHECK-NEXT: add v15.2d, v15.2d, v1.2d
; CHECK-NEXT: add v27.2d, v27.2d, v3.2d
; CHECK-NEXT: mul x17, x12, x19
; CHECK-NEXT: add v23.2d, v23.2d, v3.2d
; CHECK-NEXT: add v19.2d, v19.2d, v3.2d
; CHECK-NEXT: fmov d4, x22
; CHECK-NEXT: add v10.2d, v10.2d, v3.2d
; CHECK-NEXT: mul x14, x4, x5
; CHECK-NEXT: fmov d0, x13
; CHECK-NEXT: add v14.2d, v14.2d, v2.2d
; CHECK-NEXT: add v2.2d, v6.2d, v3.2d
; CHECK-NEXT: mul x12, x12, x5
; CHECK-NEXT: mov v16.16b, v20.16b
; CHECK-NEXT: mov v5.16b, v22.16b
; CHECK-NEXT: fmov d0, x15
; CHECK-NEXT: add v28.2d, v28.2d, v4.2d
; CHECK-NEXT: mov v4.16b, v17.16b
; CHECK-NEXT: mov v17.16b, v21.16b
; CHECK-NEXT: mov v21.16b, v10.16b
; CHECK-NEXT: mov v10.16b, v26.16b
; CHECK-NEXT: mov v14.d[1], x13
; CHECK-NEXT: mov v22.16b, v31.16b
; CHECK-NEXT: ldp q26, q31, [sp] // 32-byte Folded Reload
; CHECK-NEXT: mov v0.d[1], x12
; CHECK-NEXT: add v13.2d, v13.2d, v14.2d
; CHECK-NEXT: add v31.2d, v31.2d, v14.2d
; CHECK-NEXT: add v26.2d, v26.2d, v14.2d
; CHECK-NEXT: add v24.2d, v24.2d, v14.2d
; CHECK-NEXT: add v22.2d, v22.2d, v14.2d
; CHECK-NEXT: add v20.2d, v8.2d, v14.2d
; CHECK-NEXT: add v10.2d, v10.2d, v14.2d
; CHECK-NEXT: add v16.2d, v16.2d, v14.2d
; CHECK-NEXT: add v5.2d, v5.2d, v14.2d
; CHECK-NEXT: add v3.2d, v3.2d, v14.2d
; CHECK-NEXT: add v2.2d, v2.2d, v14.2d
; CHECK-NEXT: add v29.2d, v29.2d, v0.2d
; CHECK-NEXT: add v25.2d, v25.2d, v0.2d
; CHECK-NEXT: add v21.2d, v21.2d, v0.2d
; CHECK-NEXT: mov v3.16b, v7.16b
; CHECK-NEXT: mov v7.16b, v18.16b
; CHECK-NEXT: mov v4.d[1], x17
; CHECK-NEXT: mov v18.16b, v22.16b
; CHECK-NEXT: mov v0.d[1], x6
; CHECK-NEXT: fmov d1, x14
; CHECK-NEXT: add v28.2d, v8.2d, v4.2d
; CHECK-NEXT: mov v1.d[1], x12
; CHECK-NEXT: add v31.2d, v31.2d, v0.2d
; CHECK-NEXT: add v30.2d, v30.2d, v0.2d
; CHECK-NEXT: add v12.2d, v12.2d, v0.2d
; CHECK-NEXT: add v24.2d, v24.2d, v0.2d
; CHECK-NEXT: add v22.2d, v26.2d, v0.2d
; CHECK-NEXT: add v20.2d, v20.2d, v0.2d
; CHECK-NEXT: add v18.2d, v18.2d, v0.2d
; CHECK-NEXT: add v17.2d, v17.2d, v0.2d
; CHECK-NEXT: add v4.2d, v4.2d, v0.2d
; CHECK-NEXT: add v0.2d, v6.2d, v0.2d
; CHECK-NEXT: add v7.2d, v7.2d, v0.2d
; CHECK-NEXT: add v4.2d, v16.2d, v0.2d
; CHECK-NEXT: add v3.2d, v3.2d, v0.2d
; CHECK-NEXT: mov v0.16b, v21.16b
; CHECK-NEXT: mov v21.16b, v29.16b
; CHECK-NEXT: ldr q29, [sp] // 16-byte Folded Reload
; CHECK-NEXT: add v9.2d, v9.2d, v1.2d
; CHECK-NEXT: add v6.2d, v25.2d, v1.2d
; CHECK-NEXT: add v5.2d, v5.2d, v1.2d
; CHECK-NEXT: add v29.2d, v29.2d, v1.2d
; CHECK-NEXT: add v21.2d, v21.2d, v1.2d
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
; CHECK-NEXT: ldr q6, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: adrp x8, C
; CHECK-NEXT: add x8, x8, :lo12:C
; CHECK-NEXT: stp q12, q31, [x8, #80]
; CHECK-NEXT: stp q11, q30, [x8, #80]
; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
; CHECK-NEXT: str q6, [x8]
; CHECK-NEXT: ldr q6, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: str q29, [x8, #112]
; CHECK-NEXT: str q1, [x8]
; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: stp q15, q14, [x8, #144]
; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload
; CHECK-NEXT: stp q6, q11, [x8, #16]
; CHECK-NEXT: ldr q6, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: stp q18, q30, [x8, #144]
; CHECK-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload
; CHECK-NEXT: stp q6, q13, [x8, #48]
; CHECK-NEXT: stp q1, q13, [x8, #16]
; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: stp q28, q12, [x8, #176]
; CHECK-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload
; CHECK-NEXT: stp q28, q26, [x8, #176]
; CHECK-NEXT: stp q1, q31, [x8, #48]
; CHECK-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: stp q19, q10, [x8, #336]
; CHECK-NEXT: stp q9, q24, [x8, #240]
; CHECK-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload
; CHECK-NEXT: stp q19, q18, [x8, #336]
; CHECK-NEXT: stp q10, q7, [x8, #400]
; CHECK-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload
; CHECK-NEXT: str q29, [x8, #112]
; CHECK-NEXT: str q27, [x8, #208]
; CHECK-NEXT: stp q25, q24, [x8, #240]
; CHECK-NEXT: stp q23, q22, [x8, #272]
; CHECK-NEXT: stp q21, q20, [x8, #304]
; CHECK-NEXT: stp q17, q16, [x8, #368]
; CHECK-NEXT: stp q7, q5, [x8, #400]
; CHECK-NEXT: stp q4, q3, [x8, #432]
; CHECK-NEXT: stp q1, q2, [x8, #464]
; CHECK-NEXT: stp q6, q17, [x8, #368]
; CHECK-NEXT: stp q5, q4, [x8, #432]
; CHECK-NEXT: stp q2, q3, [x8, #464]
; CHECK-NEXT: str q0, [x8, #496]
; CHECK-NEXT: add sp, sp, #192
; CHECK-NEXT: .cfi_def_cfa_offset 0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/rcpc3-sve.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
define hidden <vscale x 2 x i64> @test_load_sve_lane0(ptr nocapture noundef readonly %a, <vscale x 2 x i64> noundef %b) local_unnamed_addr {
; CHECK-LABEL: test_load_sve_lane0:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: ldapr x8, [x0]
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: mov z0.d, p0/m, x8
; CHECK-NEXT: ret
%1 = load atomic i64, ptr %a acquire, align 8
Expand All @@ -20,9 +20,9 @@ define hidden <vscale x 2 x i64> @test_load_sve_lane0(ptr nocapture noundef read
define hidden <vscale x 2 x i64> @test_load_sve_lane1(ptr nocapture noundef readonly %a, <vscale x 2 x i64> noundef %b) local_unnamed_addr {
; CHECK-LABEL: test_load_sve_lane1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: ldapr x8, [x0]
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/reassocmls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ define i64 @mls_i64_C(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) {
; CHECK-LABEL: mls_i64_C:
; CHECK: // %bb.0:
; CHECK-NEXT: mul x8, x2, x1
; CHECK-NEXT: mov w9, #10
; CHECK-NEXT: mov w9, #10 // =0xa
; CHECK-NEXT: madd x8, x4, x3, x8
; CHECK-NEXT: sub x0, x9, x8
; CHECK-NEXT: ret
Expand Down Expand Up @@ -290,9 +290,9 @@ define <vscale x 8 x i16> @smlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8
define <vscale x 8 x i16> @umlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d, <vscale x 8 x i8> %e) {
; CHECK-LABEL: umlsl_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z3.h, z3.h, #0xff
; CHECK-NEXT: and z4.h, z4.h, #0xff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: and z2.h, z2.h, #0xff
; CHECK-NEXT: mls z0.h, p0/m, z4.h, z3.h
Expand Down Expand Up @@ -326,8 +326,8 @@ define <vscale x 8 x i16> @mls_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16>
define <vscale x 8 x i16> @mla_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e) {
; CHECK-LABEL: mla_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mul z1.h, z2.h, z1.h
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mla z1.h, p0/m, z4.h, z3.h
; CHECK-NEXT: add z0.h, z1.h, z0.h
; CHECK-NEXT: ret
Expand Down
356 changes: 178 additions & 178 deletions llvm/test/CodeGen/AArch64/reduce-shuffle.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sat-add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -346,9 +346,9 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) {
; CHECK-LABEL: unsigned_sat_constant_v16i8_using_min:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.16b, #213
; CHECK-NEXT: movi v2.16b, #42
; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b
; CHECK-NEXT: movi v1.16b, #42
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-NEXT: add v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
%s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
Expand Down Expand Up @@ -384,9 +384,9 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) {
; CHECK-LABEL: unsigned_sat_constant_v8i16_using_min:
; CHECK: // %bb.0:
; CHECK-NEXT: mvni v1.8h, #42
; CHECK-NEXT: movi v2.8h, #42
; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h
; CHECK-NEXT: movi v1.8h, #42
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ret
%c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
%s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AArch64/sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -245,15 +245,15 @@ define <3 x i32> @sext_v3i8_v3i32(<3 x i8> %a) {
; CHECK-GI-LABEL: sext_v3i8_v3i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, #24 // =0x18
; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov v1.s[1], w1
; CHECK-GI-NEXT: mov v0.s[1], w8
; CHECK-GI-NEXT: mov v1.s[2], w2
; CHECK-GI-NEXT: mov v0.s[2], w8
; CHECK-GI-NEXT: neg v2.4s, v0.4s
; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v2.4s
; CHECK-GI-NEXT: fmov s0, w0
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: mov v0.s[1], w1
; CHECK-GI-NEXT: mov v1.s[1], w8
; CHECK-GI-NEXT: mov v0.s[2], w2
; CHECK-GI-NEXT: mov v1.s[2], w8
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: neg v1.4s, v1.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
entry:
%c = sext <3 x i8> %a to <3 x i32>
Expand Down Expand Up @@ -408,15 +408,15 @@ define <3 x i32> @sext_v3i10_v3i32(<3 x i10> %a) {
; CHECK-GI-LABEL: sext_v3i10_v3i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, #22 // =0x16
; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov v1.s[1], w1
; CHECK-GI-NEXT: mov v0.s[1], w8
; CHECK-GI-NEXT: mov v1.s[2], w2
; CHECK-GI-NEXT: mov v0.s[2], w8
; CHECK-GI-NEXT: neg v2.4s, v0.4s
; CHECK-GI-NEXT: ushl v0.4s, v1.4s, v0.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v2.4s
; CHECK-GI-NEXT: fmov s0, w0
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: mov v0.s[1], w1
; CHECK-GI-NEXT: mov v1.s[1], w8
; CHECK-GI-NEXT: mov v0.s[2], w2
; CHECK-GI-NEXT: mov v1.s[2], w8
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: neg v1.4s, v1.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ret
entry:
%c = sext <3 x i10> %a to <3 x i32>
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ define i32 @sink_sub_from_const_to_sub2(i32 %a, i32 %b) {
define <4 x i32> @vec_sink_add_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_add_of_const_to_add0:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI12_0
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand All @@ -170,8 +170,8 @@ define <4 x i32> @vec_sink_add_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_add_of_const_to_add1:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI13_0
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0]
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand All @@ -186,8 +186,8 @@ define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_sub_of_const_to_add0:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI14_0
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand All @@ -198,8 +198,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_sub_of_const_to_add1:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI15_0
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0]
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand All @@ -214,8 +214,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_sub_from_const_to_add0:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: adrp x8, .LCPI16_0
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand All @@ -226,8 +226,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_sub_from_const_to_add1:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: adrp x8, .LCPI17_0
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand All @@ -242,8 +242,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_add_of_const_to_sub:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI18_0
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0]
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand All @@ -254,8 +254,8 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_add_of_const_to_sub2:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: adrp x8, .LCPI19_0
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0]
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand All @@ -270,8 +270,8 @@ define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_sub_of_const_to_sub:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI20_0
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0]
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand All @@ -282,8 +282,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_sub_of_const_to_sub2:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: adrp x8, .LCPI21_0
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0]
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand All @@ -298,8 +298,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_sub_from_const_to_sub:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI22_0
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0]
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ret
Expand All @@ -310,8 +310,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vec_sink_sub_from_const_to_sub2:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI23_0
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0]
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@ define void @fdot_multi_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @fdot_multi_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
; CHECK-LABEL: fdot_multi_za32_f16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -71,16 +71,16 @@ define void @bfdot_multi_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
; CHECK-LABEL: fdot_multi_za32_bf16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
Expand Down
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@ define void @udot_multi_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @udot_multi_za32_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: udot_multi_za32_u16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -68,16 +68,16 @@ define void @udot_multi_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
define void @udot_multi_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3,
; CHECK-LABEL: udot_multi_za32_u8_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -110,16 +110,16 @@ define void @udot_multi_za64_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @udot_multi_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: udot_multi_za64_u16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -152,16 +152,16 @@ define void @usdot_multi_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @usdot_multi_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3,
; CHECK-LABEL: usdot_multi_za32_u8_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -197,16 +197,16 @@ define void @sdot_multi_za32_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @sdot_multi_za32_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: sdot_multi_za32_u16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -239,16 +239,16 @@ define void @sdot_multi_za32_u8_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
define void @sdot_multi_za32_u8_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3,
; CHECK-LABEL: sdot_multi_za32_u8_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -281,16 +281,16 @@ define void @sdot_multi_za64_u16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
define void @sdot_multi_za64_u16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
; CHECK-LABEL: sdot_multi_za64_u16_vg1x4:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
Expand Down
305 changes: 139 additions & 166 deletions llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll

Large diffs are not rendered by default.

163 changes: 68 additions & 95 deletions llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll

Large diffs are not rendered by default.

54 changes: 27 additions & 27 deletions llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
Original file line number Diff line number Diff line change
Expand Up @@ -142,16 +142,16 @@ define void @multi_vector_mul_add_multi_long_vg4x2_s16(i32 %slice, <vscale x 8 x
define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
Expand All @@ -164,16 +164,16 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x
define void @multi_vector_mul_add_multi_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -418,16 +418,16 @@ define void @multi_vector_mul_add_multi_long_vg4x2_u16(i32 %slice, <vscale x 8 x
define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
Expand All @@ -440,16 +440,16 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x
define void @multi_vector_mul_add_multi_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -694,16 +694,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_s16(i32 %slice, <vscale x 8 x
define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
Expand All @@ -716,16 +716,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x
define void @multi_vector_mul_sub_multi_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -970,16 +970,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_u16(i32 %slice, <vscale x 8 x
define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
Expand All @@ -992,16 +992,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x
define void @multi_vector_mul_sub_multi_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
; CHECK-NEXT: ret
Expand Down Expand Up @@ -1275,16 +1275,16 @@ define void @multi_vector_mul_add_multi_unsigned_long_vg4x2_u8(i32 %slice, <vsca
define void @multi_vector_mul_add_multi_unsigned_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
; CHECK-LABEL: multi_vector_mul_add_multi_unsigned_long_vg4x4_u8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z26.d, z7.d
; CHECK-NEXT: mov z31.d, z4.d
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z25.d, z6.d
; CHECK-NEXT: mov z30.d, z3.d
; CHECK-NEXT: mov z24.d, z5.d
; CHECK-NEXT: mov z29.d, z2.d
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1]
; CHECK-NEXT: mov z28.d, z1.d
; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
; CHECK-NEXT: ret
Expand Down
Loading