21 changes: 9 additions & 12 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function masked_scatter_v8i8,masked_scatter_v8i16,masked_scatter_v8i32,masked_scatter_v8i64 --prefix VBITS_EQ_256
; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefix=NO_SVE
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK
Expand Down Expand Up @@ -85,9 +86,9 @@ define void @masked_scatter_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
; VBITS_EQ_256-NEXT: shl v2.4h, v2.4h, #8
; VBITS_EQ_256-NEXT: shl v1.4h, v1.4h, #8
; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h
; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s
; VBITS_EQ_256-NEXT: sshr v2.4h, v2.4h, #8
; VBITS_EQ_256-NEXT: sshr v1.4h, v1.4h, #8
; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s
; VBITS_EQ_256-NEXT: sunpklo z2.s, z2.h
; VBITS_EQ_256-NEXT: sunpklo z1.s, z1.h
; VBITS_EQ_256-NEXT: sunpklo z2.d, z2.s
Expand All @@ -99,7 +100,6 @@ define void @masked_scatter_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
; VBITS_EQ_256-NEXT: st1b { z1.d }, p1, [z4.d]
; VBITS_EQ_256-NEXT: st1b { z0.d }, p0, [z3.d]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_scatter_v8i8:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ldr d0, [x0]
Expand All @@ -108,8 +108,8 @@ define void @masked_scatter_v8i8(<8 x i8>* %a, <8 x i8*>* %b) #0 {
; VBITS_GE_512-NEXT: cmeq v2.8b, v0.8b, #0
; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: sunpklo z2.h, z2.b
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_512-NEXT: sunpklo z2.s, z2.h
; VBITS_GE_512-NEXT: sunpklo z2.d, z2.s
; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0
Expand All @@ -131,8 +131,8 @@ define void @masked_scatter_v16i8(<16 x i8>* %a, <16 x i8*>* %b) #0 {
; VBITS_GE_1024-NEXT: cmeq v2.16b, v0.16b, #0
; VBITS_GE_1024-NEXT: uunpklo z0.h, z0.b
; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h
; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_1024-NEXT: sunpklo z2.h, z2.b
; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_1024-NEXT: sunpklo z2.s, z2.h
; VBITS_GE_1024-NEXT: sunpklo z2.d, z2.s
; VBITS_GE_1024-NEXT: cmpne p0.d, p0/z, z2.d, #0
Expand Down Expand Up @@ -226,8 +226,8 @@ define void @masked_scatter_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 {
; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3]
; VBITS_EQ_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8
; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h
; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s
; VBITS_EQ_256-NEXT: sunpklo z2.s, z1.h
; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s
; VBITS_EQ_256-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; VBITS_EQ_256-NEXT: sunpklo z2.d, z2.s
; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z2.d, #0
Expand All @@ -240,7 +240,6 @@ define void @masked_scatter_v8i16(<8 x i16>* %a, <8 x i16*>* %b) #0 {
; VBITS_EQ_256-NEXT: uunpklo z1.d, z3.s
; VBITS_EQ_256-NEXT: st1h { z1.d }, p0, [z4.d]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_scatter_v8i16:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ldr q0, [x0]
Expand Down Expand Up @@ -369,7 +368,6 @@ define void @masked_scatter_v8i32(<8 x i32>* %a, <8 x i32*>* %b) #0 {
; VBITS_EQ_256-NEXT: st1w { z1.d }, p0, [z3.d]
; VBITS_EQ_256-NEXT: st1w { z0.d }, p1, [z2.d]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_scatter_v8i32:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
Expand Down Expand Up @@ -455,10 +453,10 @@ define void @masked_scatter_v2i64(<2 x i64>* %a, <2 x i64*>* %b) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: cmeq v1.2d, v0.2d, #0
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
; CHECK-NEXT: st1d { z0.d }, p0, [z2.d]
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: cmeq v2.2d, v0.2d, #0
; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0
; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
; CHECK-NEXT: ret
%vals = load <2 x i64>, <2 x i64>* %a
%ptrs = load <2 x i64*>, <2 x i64*>* %b
Expand Down Expand Up @@ -498,7 +496,6 @@ define void @masked_scatter_v8i64(<8 x i64>* %a, <8 x i64*>* %b) #0 {
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [z3.d]
; VBITS_EQ_256-NEXT: st1d { z0.d }, p1, [z2.d]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: masked_scatter_v8i64:
; VBITS_GE_512: // %bb.0:
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sve-vscale-attr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ define void @func_vscale_none(<16 x i32>* %a, <16 x i32>* %b) #0 {
; CHECK-NOARG-NEXT: ldp q6, q4, [x1]
; CHECK-NOARG-NEXT: stp q0, q1, [x0, #32]
; CHECK-NOARG-NEXT: add v2.4s, v2.4s, v6.4s
; CHECK-NOARG-NEXT: add v3.4s, v3.4s, v4.4s
; CHECK-NOARG-NEXT: stp q2, q3, [x0]
; CHECK-NOARG-NEXT: add v0.4s, v3.4s, v4.4s
; CHECK-NOARG-NEXT: stp q2, q0, [x0]
; CHECK-NOARG-NEXT: ret
;
; CHECK-ARG-LABEL: func_vscale_none:
Expand Down Expand Up @@ -47,8 +47,8 @@ define void @func_vscale1_1(<16 x i32>* %a, <16 x i32>* %b) #1 {
; CHECK-NEXT: ldp q6, q4, [x1]
; CHECK-NEXT: stp q0, q1, [x0, #32]
; CHECK-NEXT: add v2.4s, v2.4s, v6.4s
; CHECK-NEXT: add v3.4s, v3.4s, v4.4s
; CHECK-NEXT: stp q2, q3, [x0]
; CHECK-NEXT: add v0.4s, v3.4s, v4.4s
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i32>, <16 x i32>* %a
%op2 = load <16 x i32>, <16 x i32>* %b
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
; CHECK-LABEL: v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <8 x i8>, <8 x i8>* %px
Expand Down Expand Up @@ -158,9 +158,9 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
; CHECK-LABEL: v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <4 x i16>, <4 x i16>* %px
Expand Down Expand Up @@ -225,9 +225,9 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind
define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
; CHECK-LABEL: v1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ldr b1, [x1]
; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ldr b0, [x1]
; CHECK-NEXT: ldr b1, [x0]
; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b
; CHECK-NEXT: st1 { v0.b }[0], [x2]
; CHECK-NEXT: ret
%x = load <1 x i8>, <1 x i8>* %px
Expand All @@ -240,9 +240,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
; CHECK-LABEL: v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ldr h1, [x1]
; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ldr h0, [x1]
; CHECK-NEXT: ldr h1, [x0]
; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h
; CHECK-NEXT: str h0, [x2]
; CHECK-NEXT: ret
%x = load <1 x i16>, <1 x i16>* %px
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -67,25 +67,25 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: adrp x9, .LCPI4_1
; CHECK-NEXT: mov v0.h[1], w1
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: adrp x8, .LCPI4_1
; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI4_1]
; CHECK-NEXT: adrp x8, .LCPI4_2
; CHECK-NEXT: mov v0.h[2], w2
; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_1]
; CHECK-NEXT: adrp x8, .LCPI4_2
; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h
; CHECK-NEXT: movi d1, #0x0000000000ffff
; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_2]
; CHECK-NEXT: mul v0.4h, v0.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2]
; CHECK-NEXT: adrp x8, .LCPI4_3
; CHECK-NEXT: shl v2.4h, v0.4h, #1
; CHECK-NEXT: shl v3.4h, v0.4h, #1
; CHECK-NEXT: bic v0.4h, #248, lsl #8
; CHECK-NEXT: ushl v2.4h, v2.4h, v3.4h
; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_3]
; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
; CHECK-NEXT: ushl v1.4h, v3.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_3]
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: bic v0.4h, #248, lsl #8
; CHECK-NEXT: cmhi v0.4h, v0.4h, v1.4h
; CHECK-NEXT: cmhi v0.4h, v0.4h, v2.4h
; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: umov w1, v0.h[1]
; CHECK-NEXT: umov w2, v0.h[2]
Expand Down
204 changes: 102 additions & 102 deletions llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
; CHECK-NEXT: mov w8, #43690
; CHECK-NEXT: movk w8, #10922, lsl #16
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: shl v1.4s, v0.4s, #31
; CHECK-NEXT: ushr v0.4s, v0.4s, #1
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
Expand All @@ -70,18 +70,18 @@ define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
; CHECK-LABEL: t32_6_part1:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: mov w9, #43691
; CHECK-NEXT: movk w9, #43690, lsl #16
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: adrp x8, .LCPI3_1
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: dup v2.4s, w9
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
; CHECK-NEXT: shl v1.4s, v0.4s, #31
; CHECK-NEXT: ushr v0.4s, v0.4s, #1
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1]
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
Expand Down
58 changes: 29 additions & 29 deletions llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #23593
; CHECK-NEXT: movk w8, #49807, lsl #16
; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: mov w8, #28835
; CHECK-NEXT: movk w8, #2621, lsl #16
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
Expand All @@ -28,17 +28,17 @@ define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #23593
; CHECK-NEXT: movk w8, #49807, lsl #16
; CHECK-NEXT: movi v3.4s, #1
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: mov w8, #23592
; CHECK-NEXT: movk w8, #655, lsl #16
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: shl v1.4s, v0.4s, #30
; CHECK-NEXT: ushr v0.4s, v0.4s, #2
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
Expand All @@ -53,13 +53,13 @@ define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_neg25:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT: adrp x8, .LCPI2_1
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_1]
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
Expand All @@ -72,16 +72,16 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_even_neg100:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: movi v3.4s, #1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: adrp x8, .LCPI3_1
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
; CHECK-NEXT: shl v1.4s, v0.4s, #30
; CHECK-NEXT: ushr v0.4s, v0.4s, #2
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1]
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
Expand All @@ -98,13 +98,13 @@ define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #34079
; CHECK-NEXT: movk w8, #20971, lsl #16
; CHECK-NEXT: movi v1.4s, #25
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s
; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s
; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s
; CHECK-NEXT: ushr v2.4s, v2.4s, #3
; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s
; CHECK-NEXT: movi v2.4s, #25
; CHECK-NEXT: ushr v1.4s, v1.4s, #3
; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
Expand All @@ -120,13 +120,13 @@ define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #34079
; CHECK-NEXT: movk w8, #20971, lsl #16
; CHECK-NEXT: movi v1.4s, #100
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s
; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s
; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s
; CHECK-NEXT: ushr v2.4s, v2.4s, #5
; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s
; CHECK-NEXT: movi v2.4s, #100
; CHECK-NEXT: ushr v1.4s, v1.4s, #5
; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
Expand Down Expand Up @@ -167,10 +167,10 @@ define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_pow2:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #15
; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
Expand All @@ -182,8 +182,8 @@ define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind {
define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_int_min:
; CHECK: // %bb.0:
; CHECK-NEXT: bic v0.4s, #128, lsl #24
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: bic v0.4s, #128, lsl #24
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ define <4 x i1> @t0_all_tautological(<4 x i32> %X) nounwind {
; CHECK-LABEL: t0_all_tautological:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: adrp x9, .LCPI0_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: adrp x8, .LCPI0_1
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1]
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_1]
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 1, i32 1, i32 2, i32 2>
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/usub_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
; CHECK-LABEL: v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <8 x i8>, <8 x i8>* %px
Expand Down Expand Up @@ -155,9 +155,9 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
; CHECK-LABEL: v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <4 x i16>, <4 x i16>* %px
Expand Down Expand Up @@ -220,9 +220,9 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind
define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
; CHECK-LABEL: v1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ldr b1, [x1]
; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ldr b0, [x1]
; CHECK-NEXT: ldr b1, [x0]
; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b
; CHECK-NEXT: st1 { v0.b }[0], [x2]
; CHECK-NEXT: ret
%x = load <1 x i8>, <1 x i8>* %px
Expand All @@ -235,9 +235,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
; CHECK-LABEL: v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ldr h1, [x1]
; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ldr h0, [x1]
; CHECK-NEXT: ldr h1, [x0]
; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h
; CHECK-NEXT: str h0, [x2]
; CHECK-NEXT: ret
%x = load <1 x i16>, <1 x i16>* %px
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/vec_cttz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %a) nounwind {
; CHECK-NEXT: movi v1.8h, #1
; CHECK-NEXT: sub v1.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
; CHECK-NEXT: movi v1.8h, #16
; CHECK-NEXT: clz v0.8h, v0.8h
; CHECK-NEXT: movi v1.8h, #16
; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
; CHECK-NEXT: ret
%b = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
Expand All @@ -99,8 +99,8 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %a) nounwind {
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: sub v1.4s, v0.4s, v1.4s
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
; CHECK-NEXT: movi v1.4s, #32
; CHECK-NEXT: clz v0.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #32
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ret
%b = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
Expand Down
74 changes: 37 additions & 37 deletions llvm/test/CodeGen/AArch64/vec_uaddo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ define <3 x i32> @uaddo_v3i32(<3 x i32> %a0, <3 x i32> %a1, <3 x i32>* %p2) noun
; CHECK-NEXT: add v1.4s, v0.4s, v1.4s
; CHECK-NEXT: add x8, x0, #8
; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
; CHECK-NEXT: st1 { v1.s }[2], [x8]
; CHECK-NEXT: str d1, [x0]
; CHECK-NEXT: st1 { v1.s }[2], [x8]
; CHECK-NEXT: ret
%t = call {<3 x i32>, <3 x i1>} @llvm.uadd.with.overflow.v3i32(<3 x i32> %a0, <3 x i32> %a1)
%val = extractvalue {<3 x i32>, <3 x i1>} %t, 0
Expand Down Expand Up @@ -84,27 +84,27 @@ define <6 x i32> @uaddo_v6i32(<6 x i32> %a0, <6 x i32> %a1, <6 x i32>* %p2) noun
; CHECK-NEXT: fmov s0, w6
; CHECK-NEXT: fmov s1, w0
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: add x9, sp, #8
; CHECK-NEXT: ldr s2, [sp, #16]
; CHECK-NEXT: fmov s3, w4
; CHECK-NEXT: add x9, sp, #24
; CHECK-NEXT: add x10, sp, #8
; CHECK-NEXT: mov v0.s[1], w7
; CHECK-NEXT: fmov s3, w4
; CHECK-NEXT: mov v1.s[1], w1
; CHECK-NEXT: ld1 { v2.s }[1], [x9]
; CHECK-NEXT: mov v3.s[1], w5
; CHECK-NEXT: ld1 { v0.s }[2], [x8]
; CHECK-NEXT: add x8, sp, #24
; CHECK-NEXT: mov v1.s[2], w2
; CHECK-NEXT: ld1 { v2.s }[1], [x8]
; CHECK-NEXT: ld1 { v0.s }[3], [x9]
; CHECK-NEXT: mov v1.s[3], w3
; CHECK-NEXT: ldr x8, [sp, #32]
; CHECK-NEXT: add v2.4s, v3.4s, v2.4s
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: cmhi v3.4s, v3.4s, v2.4s
; CHECK-NEXT: ld1 { v0.s }[3], [x10]
; CHECK-NEXT: mov v1.s[3], w3
; CHECK-NEXT: str d2, [x8, #16]
; CHECK-NEXT: cmhi v1.4s, v1.4s, v0.4s
; CHECK-NEXT: str q0, [x8]
; CHECK-NEXT: cmhi v3.4s, v3.4s, v2.4s
; CHECK-NEXT: mov w5, v3.s[1]
; CHECK-NEXT: fmov w4, s3
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: cmhi v1.4s, v1.4s, v0.4s
; CHECK-NEXT: str q0, [x8]
; CHECK-NEXT: mov w1, v1.s[1]
; CHECK-NEXT: mov w2, v1.s[2]
; CHECK-NEXT: mov w3, v1.s[3]
Expand Down Expand Up @@ -141,23 +141,23 @@ define <16 x i32> @uaddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; CHECK-NEXT: add v4.16b, v0.16b, v1.16b
; CHECK-NEXT: cmhi v0.16b, v0.16b, v4.16b
; CHECK-NEXT: str q4, [x0]
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b
; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b
; CHECK-NEXT: zip1 v1.8b, v0.8b, v0.8b
; CHECK-NEXT: zip2 v2.8b, v0.8b, v0.8b
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: zip1 v3.8b, v1.8b, v0.8b
; CHECK-NEXT: zip2 v1.8b, v1.8b, v0.8b
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b
; CHECK-NEXT: zip2 v5.8b, v0.8b, v0.8b
; CHECK-NEXT: shl v1.4s, v1.4s, #31
; CHECK-NEXT: shl v2.4s, v2.4s, #31
; CHECK-NEXT: cmlt v0.4s, v1.4s, #0
; CHECK-NEXT: cmlt v1.4s, v2.4s, #0
; CHECK-NEXT: ushll v2.4s, v3.4h, #0
; CHECK-NEXT: ushll v3.4s, v5.4h, #0
; CHECK-NEXT: shl v2.4s, v2.4s, #31
; CHECK-NEXT: ushll v3.4s, v3.4h, #0
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: shl v5.4s, v0.4s, #31
; CHECK-NEXT: cmlt v0.4s, v2.4s, #0
; CHECK-NEXT: shl v3.4s, v3.4s, #31
; CHECK-NEXT: shl v6.4s, v1.4s, #31
; CHECK-NEXT: cmlt v1.4s, v5.4s, #0
; CHECK-NEXT: cmlt v2.4s, v3.4s, #0
; CHECK-NEXT: cmlt v3.4s, v6.4s, #0
; CHECK-NEXT: cmlt v2.4s, v2.4s, #0
; CHECK-NEXT: cmlt v3.4s, v3.4s, #0
; CHECK-NEXT: ret
%t = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1)
%val = extractvalue {<16 x i8>, <16 x i1>} %t, 0
Expand Down Expand Up @@ -213,26 +213,26 @@ define <4 x i32> @uaddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; CHECK-NEXT: bic v1.4s, #255, lsl #24
; CHECK-NEXT: bic v0.4s, #255, lsl #24
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov w8, v0.s[3]
; CHECK-NEXT: mov w9, v0.s[2]
; CHECK-NEXT: mov w10, v0.s[1]
; CHECK-NEXT: fmov w11, s0
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: bic v1.4s, #255, lsl #24
; CHECK-NEXT: sturh w8, [x0, #9]
; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s
; CHECK-NEXT: strh w9, [x0, #6]
; CHECK-NEXT: sturh w10, [x0, #3]
; CHECK-NEXT: lsr w9, w9, #16
; CHECK-NEXT: strh w11, [x0]
; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s
; CHECK-NEXT: lsr w10, w10, #16
; CHECK-NEXT: strb w8, [x0, #11]
; CHECK-NEXT: lsr w8, w10, #16
; CHECK-NEXT: lsr w10, w11, #16
; CHECK-NEXT: strb w9, [x0, #8]
; CHECK-NEXT: lsr w8, w11, #16
; CHECK-NEXT: strh w11, [x0]
; CHECK-NEXT: mvn v0.16b, v1.16b
; CHECK-NEXT: strb w8, [x0, #5]
; CHECK-NEXT: strb w10, [x0, #2]
; CHECK-NEXT: strb w9, [x0, #8]
; CHECK-NEXT: strb w10, [x0, #5]
; CHECK-NEXT: strb w8, [x0, #2]
; CHECK-NEXT: ret
%t = call {<4 x i24>, <4 x i1>} @llvm.uadd.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1)
%val = extractvalue {<4 x i24>, <4 x i1>} %t, 0
Expand All @@ -249,20 +249,20 @@ define <4 x i32> @uaddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: and v1.8b, v0.8b, v2.8b
; CHECK-NEXT: umov w8, v0.h[1]
; CHECK-NEXT: umov w9, v0.h[2]
; CHECK-NEXT: umov w10, v0.h[0]
; CHECK-NEXT: umov w11, v0.h[3]
; CHECK-NEXT: cmeq v1.4h, v1.4h, v0.4h
; CHECK-NEXT: and v1.8b, v0.8b, v2.8b
; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h
; CHECK-NEXT: and w8, w8, #0x1
; CHECK-NEXT: and w9, w9, #0x1
; CHECK-NEXT: mvn v0.8b, v0.8b
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: bfi w10, w8, #1, #1
; CHECK-NEXT: mvn v1.8b, v1.8b
; CHECK-NEXT: bfi w10, w9, #2, #1
; CHECK-NEXT: bfi w10, w11, #3, #29
; CHECK-NEXT: and w8, w10, #0xf
; CHECK-NEXT: sshll v0.4s, v1.4h, #0
; CHECK-NEXT: strb w8, [x0]
; CHECK-NEXT: ret
%t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
Expand Down
118 changes: 58 additions & 60 deletions llvm/test/CodeGen/AArch64/vec_umulo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ define <1 x i32> @umulo_v1i32(<1 x i32> %a0, <1 x i32> %a1, <1 x i32>* %p2) noun
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: shrn v0.2s, v1.2d, #32
; CHECK-NEXT: xtn v1.2s, v1.2d
; CHECK-NEXT: str s1, [x0]
; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s
; CHECK-NEXT: str s1, [x0]
; CHECK-NEXT: ret
%t = call {<1 x i32>, <1 x i1>} @llvm.umul.with.overflow.v1i32(<1 x i32> %a0, <1 x i32> %a1)
%val = extractvalue {<1 x i32>, <1 x i1>} %t, 0
Expand All @@ -39,8 +39,8 @@ define <2 x i32> @umulo_v2i32(<2 x i32> %a0, <2 x i32> %a1, <2 x i32>* %p2) noun
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: shrn v0.2s, v1.2d, #32
; CHECK-NEXT: xtn v1.2s, v1.2d
; CHECK-NEXT: str d1, [x0]
; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s
; CHECK-NEXT: str d1, [x0]
; CHECK-NEXT: ret
%t = call {<2 x i32>, <2 x i1>} @llvm.umul.with.overflow.v2i32(<2 x i32> %a0, <2 x i32> %a1)
%val = extractvalue {<2 x i32>, <2 x i1>} %t, 0
Expand Down Expand Up @@ -96,37 +96,37 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, <6 x i32>* %p2) noun
; CHECK-NEXT: fmov s0, w6
; CHECK-NEXT: fmov s1, w0
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: add x9, sp, #8
; CHECK-NEXT: ldr s2, [sp, #16]
; CHECK-NEXT: fmov s3, w4
; CHECK-NEXT: add x9, sp, #24
; CHECK-NEXT: add x10, sp, #8
; CHECK-NEXT: mov v0.s[1], w7
; CHECK-NEXT: fmov s3, w4
; CHECK-NEXT: mov v1.s[1], w1
; CHECK-NEXT: ld1 { v2.s }[1], [x9]
; CHECK-NEXT: mov v3.s[1], w5
; CHECK-NEXT: ld1 { v0.s }[2], [x8]
; CHECK-NEXT: add x8, sp, #24
; CHECK-NEXT: mov v1.s[2], w2
; CHECK-NEXT: ld1 { v2.s }[1], [x8]
; CHECK-NEXT: ld1 { v0.s }[3], [x9]
; CHECK-NEXT: mov v1.s[3], w3
; CHECK-NEXT: ldr x8, [sp, #32]
; CHECK-NEXT: umull2 v6.2d, v3.4s, v2.4s
; CHECK-NEXT: umull2 v4.2d, v3.4s, v2.4s
; CHECK-NEXT: ld1 { v0.s }[3], [x10]
; CHECK-NEXT: mov v1.s[3], w3
; CHECK-NEXT: umull v7.2d, v3.2s, v2.2s
; CHECK-NEXT: umull2 v4.2d, v1.4s, v0.4s
; CHECK-NEXT: umull v5.2d, v1.2s, v0.2s
; CHECK-NEXT: mul v2.4s, v3.4s, v2.4s
; CHECK-NEXT: mul v0.4s, v1.4s, v0.4s
; CHECK-NEXT: uzp2 v4.4s, v5.4s, v4.4s
; CHECK-NEXT: uzp2 v5.4s, v7.4s, v6.4s
; CHECK-NEXT: umull2 v5.2d, v1.4s, v0.4s
; CHECK-NEXT: umull v6.2d, v1.2s, v0.2s
; CHECK-NEXT: uzp2 v4.4s, v7.4s, v4.4s
; CHECK-NEXT: str d2, [x8, #16]
; CHECK-NEXT: str q0, [x8]
; CHECK-NEXT: mul v0.4s, v1.4s, v0.4s
; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s
; CHECK-NEXT: cmtst v4.4s, v4.4s, v4.4s
; CHECK-NEXT: str q0, [x8]
; CHECK-NEXT: cmtst v3.4s, v5.4s, v5.4s
; CHECK-NEXT: mov w1, v4.s[1]
; CHECK-NEXT: mov w2, v4.s[2]
; CHECK-NEXT: mov w3, v4.s[3]
; CHECK-NEXT: mov w5, v3.s[1]
; CHECK-NEXT: fmov w0, s4
; CHECK-NEXT: fmov w4, s3
; CHECK-NEXT: mov w5, v4.s[1]
; CHECK-NEXT: fmov w4, s4
; CHECK-NEXT: mov w1, v3.s[1]
; CHECK-NEXT: mov w2, v3.s[2]
; CHECK-NEXT: mov w3, v3.s[3]
; CHECK-NEXT: fmov w0, s3
; CHECK-NEXT: ret
%t = call {<6 x i32>, <6 x i1>} @llvm.umul.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1)
%val = extractvalue {<6 x i32>, <6 x i1>} %t, 0
Expand Down Expand Up @@ -166,29 +166,27 @@ define <16 x i32> @umulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; CHECK: // %bb.0:
; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b
; CHECK-NEXT: umull v3.8h, v0.8b, v1.8b
; CHECK-NEXT: mul v5.16b, v0.16b, v1.16b
; CHECK-NEXT: uzp2 v2.16b, v3.16b, v2.16b
; CHECK-NEXT: str q5, [x0]
; CHECK-NEXT: cmtst v2.16b, v2.16b, v2.16b
; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8
; CHECK-NEXT: zip1 v4.8b, v2.8b, v0.8b
; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b
; CHECK-NEXT: zip1 v5.8b, v3.8b, v0.8b
; CHECK-NEXT: zip2 v3.8b, v3.8b, v0.8b
; CHECK-NEXT: ushll v4.4s, v4.4h, #0
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: ushll v5.4s, v5.4h, #0
; CHECK-NEXT: ushll v3.4s, v3.4h, #0
; CHECK-NEXT: shl v4.4s, v4.4s, #31
; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b
; CHECK-NEXT: zip2 v4.8b, v2.8b, v0.8b
; CHECK-NEXT: ext v0.16b, v2.16b, v2.16b, #8
; CHECK-NEXT: ushll v1.4s, v3.4h, #0
; CHECK-NEXT: ushll v2.4s, v4.4h, #0
; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b
; CHECK-NEXT: zip2 v4.8b, v0.8b, v0.8b
; CHECK-NEXT: shl v1.4s, v1.4s, #31
; CHECK-NEXT: shl v2.4s, v2.4s, #31
; CHECK-NEXT: cmlt v0.4s, v1.4s, #0
; CHECK-NEXT: cmlt v1.4s, v2.4s, #0
; CHECK-NEXT: ushll v2.4s, v3.4h, #0
; CHECK-NEXT: ushll v3.4s, v4.4h, #0
; CHECK-NEXT: shl v2.4s, v2.4s, #31
; CHECK-NEXT: shl v6.4s, v5.4s, #31
; CHECK-NEXT: shl v3.4s, v3.4s, #31
; CHECK-NEXT: cmlt v4.4s, v4.4s, #0
; CHECK-NEXT: cmlt v5.4s, v2.4s, #0
; CHECK-NEXT: cmlt v2.4s, v6.4s, #0
; CHECK-NEXT: cmlt v2.4s, v2.4s, #0
; CHECK-NEXT: cmlt v3.4s, v3.4s, #0
; CHECK-NEXT: mul v6.16b, v0.16b, v1.16b
; CHECK-NEXT: mov v0.16b, v4.16b
; CHECK-NEXT: mov v1.16b, v5.16b
; CHECK-NEXT: str q6, [x0]
; CHECK-NEXT: ret
%t = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1)
%val = extractvalue {<16 x i8>, <16 x i1>} %t, 0
Expand Down Expand Up @@ -262,30 +260,30 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
; CHECK: // %bb.0:
; CHECK-NEXT: bic v1.4s, #255, lsl #24
; CHECK-NEXT: bic v0.4s, #255, lsl #24
; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uzp2 v1.4s, v3.4s, v2.4s
; CHECK-NEXT: ushr v2.4s, v0.4s, #24
; CHECK-NEXT: mov w8, v0.s[3]
; CHECK-NEXT: mov w9, v0.s[2]
; CHECK-NEXT: mov w10, v0.s[1]
; CHECK-NEXT: cmeq v1.4s, v1.4s, #0
; CHECK-NEXT: fmov w11, s0
; CHECK-NEXT: cmtst v2.4s, v2.4s, v2.4s
; CHECK-NEXT: mul v2.4s, v0.4s, v1.4s
; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: mov w8, v2.s[3]
; CHECK-NEXT: mov w10, v2.s[2]
; CHECK-NEXT: mov w11, v2.s[1]
; CHECK-NEXT: ushr v1.4s, v2.4s, #24
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v3.4s
; CHECK-NEXT: fmov w9, s2
; CHECK-NEXT: cmtst v1.4s, v1.4s, v1.4s
; CHECK-NEXT: sturh w8, [x0, #9]
; CHECK-NEXT: lsr w8, w8, #16
; CHECK-NEXT: strh w9, [x0, #6]
; CHECK-NEXT: lsr w9, w9, #16
; CHECK-NEXT: sturh w10, [x0, #3]
; CHECK-NEXT: orn v0.16b, v2.16b, v1.16b
; CHECK-NEXT: strh w10, [x0, #6]
; CHECK-NEXT: lsr w10, w10, #16
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: sturh w11, [x0, #3]
; CHECK-NEXT: lsr w11, w11, #16
; CHECK-NEXT: strb w8, [x0, #11]
; CHECK-NEXT: lsr w8, w10, #16
; CHECK-NEXT: lsr w10, w11, #16
; CHECK-NEXT: strh w11, [x0]
; CHECK-NEXT: strb w9, [x0, #8]
; CHECK-NEXT: strb w8, [x0, #5]
; CHECK-NEXT: strb w10, [x0, #2]
; CHECK-NEXT: lsr w8, w9, #16
; CHECK-NEXT: strh w9, [x0]
; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b
; CHECK-NEXT: strb w10, [x0, #8]
; CHECK-NEXT: strb w11, [x0, #5]
; CHECK-NEXT: strb w8, [x0, #2]
; CHECK-NEXT: ret
%t = call {<4 x i24>, <4 x i1>} @llvm.umul.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1)
%val = extractvalue {<4 x i24>, <4 x i1>} %t, 0
Expand Down
162 changes: 81 additions & 81 deletions llvm/test/CodeGen/AArch64/vecreduce-add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -407,17 +407,17 @@ define i64 @add_v16i8_v16i64_zext(<16 x i8> %x) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: ushll v2.4s, v1.4h, #0
; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v2.4s
; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v1.4s
; CHECK-NEXT: uaddl v2.2d, v3.2s, v2.2s
; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s
; CHECK-NEXT: add v1.2d, v5.2d, v4.2d
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v1.4s
; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v2.4s
; CHECK-NEXT: uaddl v1.2d, v3.2s, v1.2s
; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s
; CHECK-NEXT: add v2.2d, v5.2d, v4.2d
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
Expand All @@ -432,17 +432,17 @@ define i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-NEXT: sshll v2.4s, v1.4h, #0
; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: saddl2 v4.2d, v3.4s, v2.4s
; CHECK-NEXT: saddl2 v5.2d, v0.4s, v1.4s
; CHECK-NEXT: saddl v2.2d, v3.2s, v2.2s
; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s
; CHECK-NEXT: add v1.2d, v5.2d, v4.2d
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-NEXT: saddl2 v4.2d, v3.4s, v1.4s
; CHECK-NEXT: saddl2 v5.2d, v0.4s, v2.4s
; CHECK-NEXT: saddl v1.2d, v3.2s, v1.2s
; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s
; CHECK-NEXT: add v2.2d, v5.2d, v4.2d
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
Expand Down Expand Up @@ -1029,17 +1029,17 @@ define i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: ushll v2.4s, v1.4h, #0
; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v2.4s
; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v1.4s
; CHECK-NEXT: uaddl v2.2d, v3.2s, v2.2s
; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s
; CHECK-NEXT: add v1.2d, v5.2d, v4.2d
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v1.4s
; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v2.4s
; CHECK-NEXT: uaddl v1.2d, v3.2s, v1.2s
; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s
; CHECK-NEXT: add v2.2d, v5.2d, v4.2d
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: add x0, x8, x0
Expand All @@ -1056,17 +1056,17 @@ define i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-NEXT: sshll v2.4s, v1.4h, #0
; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: saddl2 v4.2d, v3.4s, v2.4s
; CHECK-NEXT: saddl2 v5.2d, v0.4s, v1.4s
; CHECK-NEXT: saddl v2.2d, v3.2s, v2.2s
; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s
; CHECK-NEXT: add v1.2d, v5.2d, v4.2d
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-NEXT: saddl2 v4.2d, v3.4s, v1.4s
; CHECK-NEXT: saddl2 v5.2d, v0.4s, v2.4s
; CHECK-NEXT: saddl v1.2d, v3.2s, v1.2s
; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s
; CHECK-NEXT: add v2.2d, v5.2d, v4.2d
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: add x0, x8, x0
Expand Down Expand Up @@ -1766,29 +1766,29 @@ define i64 @add_pair_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll2 v3.8h, v1.16b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: ushll2 v4.4s, v2.8h, #0
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: ushll2 v5.4s, v0.8h, #0
; CHECK-NEXT: ushll v3.4s, v2.4h, #0
; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0
; CHECK-NEXT: ushll2 v4.4s, v0.8h, #0
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ushll v6.4s, v3.4h, #0
; CHECK-NEXT: ushll v7.4s, v1.4h, #0
; CHECK-NEXT: ushll2 v3.4s, v3.8h, #0
; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-NEXT: uaddl2 v16.2d, v5.4s, v4.4s
; CHECK-NEXT: uaddl v4.2d, v5.2s, v4.2s
; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v2.4s
; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s
; CHECK-NEXT: uaddl2 v2.2d, v1.4s, v3.4s
; CHECK-NEXT: uaddl2 v5.2d, v4.4s, v2.4s
; CHECK-NEXT: uaddl2 v6.2d, v0.4s, v3.4s
; CHECK-NEXT: ushll2 v7.8h, v1.16b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: uaddl v2.2d, v4.2s, v2.2s
; CHECK-NEXT: add v4.2d, v6.2d, v5.2d
; CHECK-NEXT: uaddl v0.2d, v0.2s, v3.2s
; CHECK-NEXT: ushll v3.4s, v7.4h, #0
; CHECK-NEXT: ushll2 v5.4s, v7.8h, #0
; CHECK-NEXT: ushll2 v6.4s, v1.8h, #0
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: uaddl2 v7.2d, v6.4s, v5.4s
; CHECK-NEXT: uaddl v5.2d, v6.2s, v5.2s
; CHECK-NEXT: uaddl2 v6.2d, v1.4s, v3.4s
; CHECK-NEXT: uaddl v1.2d, v1.2s, v3.2s
; CHECK-NEXT: uaddl2 v3.2d, v7.4s, v6.4s
; CHECK-NEXT: uaddl v6.2d, v7.2s, v6.2s
; CHECK-NEXT: add v5.2d, v5.2d, v16.2d
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-NEXT: add v2.2d, v6.2d, v7.2d
; CHECK-NEXT: add v1.2d, v1.2d, v5.2d
; CHECK-NEXT: add v0.2d, v0.2d, v4.2d
; CHECK-NEXT: add v2.2d, v3.2d, v2.2d
; CHECK-NEXT: add v1.2d, v6.2d, v1.2d
; CHECK-NEXT: add v0.2d, v0.2d, v5.2d
; CHECK-NEXT: add v1.2d, v1.2d, v2.2d
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: addp d0, v0.2d
Expand All @@ -1808,29 +1808,29 @@ define i64 @add_pair_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshll2 v2.8h, v0.16b, #0
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll2 v3.8h, v1.16b, #0
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-NEXT: sshll2 v4.4s, v2.8h, #0
; CHECK-NEXT: sshll v2.4s, v2.4h, #0
; CHECK-NEXT: sshll2 v5.4s, v0.8h, #0
; CHECK-NEXT: sshll v3.4s, v2.4h, #0
; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0
; CHECK-NEXT: sshll2 v4.4s, v0.8h, #0
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: sshll v6.4s, v3.4h, #0
; CHECK-NEXT: sshll v7.4s, v1.4h, #0
; CHECK-NEXT: sshll2 v3.4s, v3.8h, #0
; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
; CHECK-NEXT: saddl2 v16.2d, v5.4s, v4.4s
; CHECK-NEXT: saddl v4.2d, v5.2s, v4.2s
; CHECK-NEXT: saddl2 v5.2d, v0.4s, v2.4s
; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s
; CHECK-NEXT: saddl2 v2.2d, v1.4s, v3.4s
; CHECK-NEXT: saddl2 v5.2d, v4.4s, v2.4s
; CHECK-NEXT: saddl2 v6.2d, v0.4s, v3.4s
; CHECK-NEXT: sshll2 v7.8h, v1.16b, #0
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-NEXT: saddl v2.2d, v4.2s, v2.2s
; CHECK-NEXT: add v4.2d, v6.2d, v5.2d
; CHECK-NEXT: saddl v0.2d, v0.2s, v3.2s
; CHECK-NEXT: sshll v3.4s, v7.4h, #0
; CHECK-NEXT: sshll2 v5.4s, v7.8h, #0
; CHECK-NEXT: sshll2 v6.4s, v1.8h, #0
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-NEXT: saddl2 v7.2d, v6.4s, v5.4s
; CHECK-NEXT: saddl v5.2d, v6.2s, v5.2s
; CHECK-NEXT: saddl2 v6.2d, v1.4s, v3.4s
; CHECK-NEXT: saddl v1.2d, v1.2s, v3.2s
; CHECK-NEXT: saddl2 v3.2d, v7.4s, v6.4s
; CHECK-NEXT: saddl v6.2d, v7.2s, v6.2s
; CHECK-NEXT: add v5.2d, v5.2d, v16.2d
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-NEXT: add v2.2d, v6.2d, v7.2d
; CHECK-NEXT: add v1.2d, v1.2d, v5.2d
; CHECK-NEXT: add v0.2d, v0.2d, v4.2d
; CHECK-NEXT: add v2.2d, v3.2d, v2.2d
; CHECK-NEXT: add v1.2d, v6.2d, v1.2d
; CHECK-NEXT: add v0.2d, v0.2d, v5.2d
; CHECK-NEXT: add v1.2d, v1.2d, v2.2d
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: addp d0, v0.2d
Expand Down Expand Up @@ -1925,21 +1925,21 @@ entry:
define i64 @add_pair_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: add_pair_v4i8_v4i64_sext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: ushll v2.2d, v0.2s, #0
; CHECK-NEXT: ushll v3.2d, v1.2s, #0
; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ushll v2.2d, v1.2s, #0
; CHECK-NEXT: ushll v3.2d, v0.2s, #0
; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
; CHECK-NEXT: shl v2.2d, v2.2d, #56
; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
; CHECK-NEXT: shl v3.2d, v3.2d, #56
; CHECK-NEXT: shl v2.2d, v2.2d, #56
; CHECK-NEXT: shl v0.2d, v0.2d, #56
; CHECK-NEXT: sshr v3.2d, v3.2d, #56
; CHECK-NEXT: shl v1.2d, v1.2d, #56
; CHECK-NEXT: sshr v2.2d, v2.2d, #56
; CHECK-NEXT: sshr v3.2d, v3.2d, #56
; CHECK-NEXT: ssra v2.2d, v0.2d, #56
; CHECK-NEXT: ssra v3.2d, v1.2d, #56
; CHECK-NEXT: add v0.2d, v2.2d, v3.2d
; CHECK-NEXT: ssra v3.2d, v0.2d, #56
; CHECK-NEXT: ssra v2.2d, v1.2d, #56
; CHECK-NEXT: add v0.2d, v3.2d, v2.2d
; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-LABEL: test_v9i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov w8, #-1
; CHECK-NEXT: umov w12, v0.b[4]
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v1.b[9], w8
; CHECK-NEXT: mov v1.b[10], w8
; CHECK-NEXT: mov v1.b[11], w8
Expand Down Expand Up @@ -129,8 +129,8 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: and v1.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: ret
%b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a)
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -302,14 +302,14 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7
; CHECK-FP-NEXT: mov v0.h[1], v1.h[0]
; CHECK-FP-NEXT: mvni v1.8h, #4, lsl #8
; CHECK-FP-NEXT: mov v0.h[2], v2.h[0]
; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8]
; CHECK-FP-NEXT: add x8, sp, #8
; CHECK-FP-NEXT: mov v0.h[3], v3.h[0]
; CHECK-FP-NEXT: mov v0.h[2], v2.h[0]
; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8]
; CHECK-FP-NEXT: add x8, sp, #16
; CHECK-FP-NEXT: mov v0.h[4], v4.h[0]
; CHECK-FP-NEXT: mov v0.h[3], v3.h[0]
; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8]
; CHECK-FP-NEXT: mov v0.h[4], v4.h[0]
; CHECK-FP-NEXT: mov v0.h[5], v5.h[0]
; CHECK-FP-NEXT: mov v0.h[6], v6.h[0]
; CHECK-FP-NEXT: mov v0.h[7], v7.h[0]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -302,14 +302,14 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7
; CHECK-FP-NEXT: mov v0.h[1], v1.h[0]
; CHECK-FP-NEXT: mvni v1.8h, #132, lsl #8
; CHECK-FP-NEXT: mov v0.h[2], v2.h[0]
; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8]
; CHECK-FP-NEXT: add x8, sp, #8
; CHECK-FP-NEXT: mov v0.h[3], v3.h[0]
; CHECK-FP-NEXT: mov v0.h[2], v2.h[0]
; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8]
; CHECK-FP-NEXT: add x8, sp, #16
; CHECK-FP-NEXT: mov v0.h[4], v4.h[0]
; CHECK-FP-NEXT: mov v0.h[3], v3.h[0]
; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8]
; CHECK-FP-NEXT: mov v0.h[4], v4.h[0]
; CHECK-FP-NEXT: mov v0.h[5], v5.h[0]
; CHECK-FP-NEXT: mov v0.h[6], v6.h[0]
; CHECK-FP-NEXT: mov v0.h[7], v7.h[0]
Expand Down
74 changes: 37 additions & 37 deletions llvm/test/CodeGen/AArch64/vector-fcopysign.ll
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0
define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 {
; CHECK-LABEL: test_copysign_v2f32_v2f64:
; CHECK: ; %bb.0:
; CHECK-NEXT: mvni.2s v2, #128, lsl #24
; CHECK-NEXT: fcvtn v1.2s, v1.2d
; CHECK-NEXT: mvni.2s v2, #128, lsl #24
; CHECK-NEXT: bif.8b v0, v1, v2
; CHECK-NEXT: ret
%tmp0 = fptrunc <2 x double> %b to <2 x float>
Expand All @@ -110,9 +110,9 @@ define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #
; CHECK-LABEL: test_copysign_v4f32_v4f64:
; CHECK: ; %bb.0:
; CHECK-NEXT: fcvtn v1.2s, v1.2d
; CHECK-NEXT: mvni.4s v3, #128, lsl #24
; CHECK-NEXT: fcvtn2 v1.4s, v2.2d
; CHECK-NEXT: bif.16b v0, v1, v3
; CHECK-NEXT: mvni.4s v2, #128, lsl #24
; CHECK-NEXT: bif.16b v0, v1, v2
; CHECK-NEXT: ret
%tmp0 = fptrunc <4 x double> %b to <4 x float>
%r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
Expand Down Expand Up @@ -191,21 +191,21 @@ define <4 x half> @test_copysign_v4f16_v4f16(<4 x half> %a, <4 x half> %b) #0 {
; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0
; NOFP16-NEXT: mov h3, v1[1]
; NOFP16-NEXT: mov h4, v0[1]
; NOFP16-NEXT: mvni.4s v2, #128, lsl #24
; NOFP16-NEXT: fcvt s5, h1
; NOFP16-NEXT: fcvt s6, h0
; NOFP16-NEXT: mov h7, v1[2]
; NOFP16-NEXT: mov h16, v0[2]
; NOFP16-NEXT: mvni.4s v2, #128, lsl #24
; NOFP16-NEXT: mov h1, v1[3]
; NOFP16-NEXT: fcvt s3, h3
; NOFP16-NEXT: fcvt s4, h4
; NOFP16-NEXT: mov h1, v1[3]
; NOFP16-NEXT: bit.16b v5, v6, v2
; NOFP16-NEXT: fcvt s6, h7
; NOFP16-NEXT: fcvt s7, h16
; NOFP16-NEXT: fcvt s1, h1
; NOFP16-NEXT: bit.16b v3, v4, v2
; NOFP16-NEXT: mov h4, v0[3]
; NOFP16-NEXT: fcvt h0, s5
; NOFP16-NEXT: fcvt s1, h1
; NOFP16-NEXT: bit.16b v6, v7, v2
; NOFP16-NEXT: fcvt h3, s3
; NOFP16-NEXT: fcvt s4, h4
Expand Down Expand Up @@ -233,9 +233,9 @@ define <4 x half> @test_copysign_v4f16_v4f32(<4 x half> %a, <4 x float> %b) #0 {
; NOFP16-NEXT: fcvtn v1.4h, v1.4s
; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0
; NOFP16-NEXT: mov h3, v0[1]
; NOFP16-NEXT: mvni.4s v2, #128, lsl #24
; NOFP16-NEXT: fcvt s5, h0
; NOFP16-NEXT: mov h7, v0[2]
; NOFP16-NEXT: mvni.4s v2, #128, lsl #24
; NOFP16-NEXT: mov h4, v1[1]
; NOFP16-NEXT: fcvt s6, h1
; NOFP16-NEXT: mov h16, v1[2]
Expand Down Expand Up @@ -263,8 +263,8 @@ define <4 x half> @test_copysign_v4f16_v4f32(<4 x half> %a, <4 x float> %b) #0 {
;
; FP16-LABEL: test_copysign_v4f16_v4f32:
; FP16: ; %bb.0:
; FP16-NEXT: mvni.4h v2, #128, lsl #8
; FP16-NEXT: fcvtn v1.4h, v1.4s
; FP16-NEXT: mvni.4h v2, #128, lsl #8
; FP16-NEXT: bif.8b v0, v1, v2
; FP16-NEXT: ret
%tmp0 = fptrunc <4 x float> %b to <4 x half>
Expand All @@ -278,46 +278,46 @@ define <4 x half> @test_copysign_v4f16_v4f64(<4 x half> %a, <4 x double> %b) #0
; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0
; NOFP16-NEXT: mov d4, v1[1]
; NOFP16-NEXT: mov h5, v0[1]
; NOFP16-NEXT: mvni.4s v3, #128, lsl #24
; NOFP16-NEXT: fcvt s1, d1
; NOFP16-NEXT: fcvt s6, h0
; NOFP16-NEXT: mov h7, v0[2]
; NOFP16-NEXT: mvni.4s v3, #128, lsl #24
; NOFP16-NEXT: fcvt s4, d4
; NOFP16-NEXT: fcvt s5, h5
; NOFP16-NEXT: bit.16b v1, v6, v3
; NOFP16-NEXT: fcvt s6, d2
; NOFP16-NEXT: fcvt s7, h7
; NOFP16-NEXT: bit.16b v4, v5, v3
; NOFP16-NEXT: mov d2, v2[1]
; NOFP16-NEXT: bit.16b v4, v5, v3
; NOFP16-NEXT: mov h5, v0[3]
; NOFP16-NEXT: fcvt h0, s1
; NOFP16-NEXT: bit.16b v6, v7, v3
; NOFP16-NEXT: fcvt h1, s4
; NOFP16-NEXT: fcvt s2, d2
; NOFP16-NEXT: fcvt h1, s4
; NOFP16-NEXT: fcvt s4, h5
; NOFP16-NEXT: fcvt h5, s6
; NOFP16-NEXT: mov.h v0[1], v1[0]
; NOFP16-NEXT: mov.16b v1, v3
; NOFP16-NEXT: mov.h v0[2], v5[0]
; NOFP16-NEXT: bsl.16b v1, v4, v2
; NOFP16-NEXT: mov.h v0[2], v5[0]
; NOFP16-NEXT: fcvt h1, s1
; NOFP16-NEXT: mov.h v0[3], v1[0]
; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0
; NOFP16-NEXT: ret
;
; FP16-LABEL: test_copysign_v4f16_v4f64:
; FP16: ; %bb.0:
; FP16-NEXT: mov d4, v1[1]
; FP16-NEXT: mov d3, v1[1]
; FP16-NEXT: fcvt h1, d1
; FP16-NEXT: mvni.4h v3, #128, lsl #8
; FP16-NEXT: fcvt h4, d4
; FP16-NEXT: mov.h v1[1], v4[0]
; FP16-NEXT: fcvt h4, d2
; FP16-NEXT: fcvt h3, d3
; FP16-NEXT: mov.h v1[1], v3[0]
; FP16-NEXT: fcvt h3, d2
; FP16-NEXT: mov d2, v2[1]
; FP16-NEXT: mov.h v1[2], v4[0]
; FP16-NEXT: mov.h v1[2], v3[0]
; FP16-NEXT: fcvt h2, d2
; FP16-NEXT: mov.h v1[3], v2[0]
; FP16-NEXT: bif.8b v0, v1, v3
; FP16-NEXT: mvni.4h v2, #128, lsl #8
; FP16-NEXT: bif.8b v0, v1, v2
; FP16-NEXT: ret
%tmp0 = fptrunc <4 x double> %b to <4 x half>
%r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
Expand All @@ -333,33 +333,33 @@ define <8 x half> @test_copysign_v8f16_v8f16(<8 x half> %a, <8 x half> %b) #0 {
; NOFP16: ; %bb.0:
; NOFP16-NEXT: mov h5, v1[1]
; NOFP16-NEXT: mov h6, v0[1]
; NOFP16-NEXT: mvni.4s v3, #128, lsl #24
; NOFP16-NEXT: fcvt s2, h1
; NOFP16-NEXT: fcvt s4, h0
; NOFP16-NEXT: mov h7, v1[2]
; NOFP16-NEXT: mov h16, v0[2]
; NOFP16-NEXT: mvni.4s v3, #128, lsl #24
; NOFP16-NEXT: mov h17, v0[3]
; NOFP16-NEXT: fcvt s5, h5
; NOFP16-NEXT: fcvt s6, h6
; NOFP16-NEXT: mov h17, v0[3]
; NOFP16-NEXT: mov h18, v0[5]
; NOFP16-NEXT: bit.16b v2, v4, v3
; NOFP16-NEXT: mov h4, v1[3]
; NOFP16-NEXT: fcvt s7, h7
; NOFP16-NEXT: fcvt s16, h16
; NOFP16-NEXT: bit.16b v5, v6, v3
; NOFP16-NEXT: fcvt s17, h17
; NOFP16-NEXT: bit.16b v5, v6, v3
; NOFP16-NEXT: mov.16b v6, v3
; NOFP16-NEXT: fcvt s4, h4
; NOFP16-NEXT: fcvt h2, s2
; NOFP16-NEXT: fcvt h5, s5
; NOFP16-NEXT: bsl.16b v6, v16, v7
; NOFP16-NEXT: mov h7, v1[4]
; NOFP16-NEXT: mov h16, v0[4]
; NOFP16-NEXT: fcvt h2, s2
; NOFP16-NEXT: fcvt h5, s5
; NOFP16-NEXT: bit.16b v4, v17, v3
; NOFP16-NEXT: mov h17, v1[5]
; NOFP16-NEXT: mov.h v2[1], v5[0]
; NOFP16-NEXT: fcvt s7, h7
; NOFP16-NEXT: fcvt s16, h16
; NOFP16-NEXT: mov.h v2[1], v5[0]
; NOFP16-NEXT: fcvt h5, s6
; NOFP16-NEXT: fcvt s6, h17
; NOFP16-NEXT: fcvt s17, h18
Expand Down Expand Up @@ -403,11 +403,11 @@ define <8 x half> @test_copysign_v8f16_v8f32(<8 x half> %a, <8 x float> %b) #0 {
; NOFP16: ; %bb.0:
; NOFP16-NEXT: fcvtn v1.4h, v1.4s
; NOFP16-NEXT: fcvtn v2.4h, v2.4s
; NOFP16-NEXT: mvni.4s v3, #128, lsl #24
; NOFP16-NEXT: mov h4, v0[1]
; NOFP16-NEXT: mov h5, v0[4]
; NOFP16-NEXT: fcvt s7, h0
; NOFP16-NEXT: mov h17, v0[2]
; NOFP16-NEXT: mvni.4s v3, #128, lsl #24
; NOFP16-NEXT: mov h6, v1[1]
; NOFP16-NEXT: fcvt s16, h1
; NOFP16-NEXT: fcvt s4, h4
Expand All @@ -425,29 +425,29 @@ define <8 x half> @test_copysign_v8f16_v8f32(<8 x half> %a, <8 x float> %b) #0 {
; NOFP16-NEXT: fcvt h1, s7
; NOFP16-NEXT: mov.16b v7, v3
; NOFP16-NEXT: fcvt h4, s4
; NOFP16-NEXT: bsl.16b v7, v17, v18
; NOFP16-NEXT: fcvt s6, h6
; NOFP16-NEXT: fcvt s16, h16
; NOFP16-NEXT: fcvt h5, s5
; NOFP16-NEXT: bsl.16b v7, v17, v18
; NOFP16-NEXT: mov h17, v0[5]
; NOFP16-NEXT: mov h18, v2[1]
; NOFP16-NEXT: fcvt h5, s5
; NOFP16-NEXT: mov.h v1[1], v4[0]
; NOFP16-NEXT: bif.16b v6, v16, v3
; NOFP16-NEXT: fcvt h4, s7
; NOFP16-NEXT: bif.16b v6, v16, v3
; NOFP16-NEXT: fcvt s7, h17
; NOFP16-NEXT: fcvt s17, h18
; NOFP16-NEXT: mov h16, v2[2]
; NOFP16-NEXT: mov h2, v2[3]
; NOFP16-NEXT: fcvt h6, s6
; NOFP16-NEXT: mov.h v1[2], v4[0]
; NOFP16-NEXT: mov h4, v0[6]
; NOFP16-NEXT: bif.16b v7, v17, v3
; NOFP16-NEXT: fcvt s16, h16
; NOFP16-NEXT: mov h16, v2[2]
; NOFP16-NEXT: fcvt h6, s6
; NOFP16-NEXT: mov h0, v0[7]
; NOFP16-NEXT: fcvt s2, h2
; NOFP16-NEXT: bif.16b v7, v17, v3
; NOFP16-NEXT: mov h2, v2[3]
; NOFP16-NEXT: fcvt s4, h4
; NOFP16-NEXT: fcvt s16, h16
; NOFP16-NEXT: mov.h v1[3], v6[0]
; NOFP16-NEXT: fcvt s0, h0
; NOFP16-NEXT: fcvt s2, h2
; NOFP16-NEXT: bif.16b v4, v16, v3
; NOFP16-NEXT: mov.h v1[4], v5[0]
; NOFP16-NEXT: fcvt h5, s7
Expand All @@ -464,9 +464,9 @@ define <8 x half> @test_copysign_v8f16_v8f32(<8 x half> %a, <8 x float> %b) #0 {
; FP16: ; %bb.0:
; FP16-NEXT: fcvtn v2.4h, v2.4s
; FP16-NEXT: fcvtn v1.4h, v1.4s
; FP16-NEXT: mvni.8h v3, #128, lsl #8
; FP16-NEXT: mov.d v1[1], v2[0]
; FP16-NEXT: bif.16b v0, v1, v3
; FP16-NEXT: mvni.8h v2, #128, lsl #8
; FP16-NEXT: bif.16b v0, v1, v2
; FP16-NEXT: ret
%tmp0 = fptrunc <8 x float> %b to <8 x half>
%r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0)
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AArch64/vselect-constants.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) {
; CHECK-LABEL: sel_C1_or_C2_vec:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: adrp x9, .LCPI0_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: shl v0.4s, v0.4s, #31
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1]
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
Expand All @@ -29,9 +29,9 @@ define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: adrp x9, .LCPI1_1
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_1]
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI1_1]
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: ret
%cond = icmp eq <4 x i32> %x, %y
%add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
Expand All @@ -41,11 +41,11 @@ define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) {
; CHECK-LABEL: sel_Cplus1_or_C_vec:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: adrp x9, .LCPI2_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: shl v0.4s, v0.4s, #31
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1]
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
Expand All @@ -60,9 +60,9 @@ define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: adrp x9, .LCPI3_1
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_1]
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1]
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: ret
%cond = icmp eq <4 x i32> %x, %y
%add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
Expand All @@ -72,11 +72,11 @@ define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) {
; CHECK-LABEL: sel_Cminus1_or_C_vec:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: adrp x9, .LCPI4_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: shl v0.4s, v0.4s, #31
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_1]
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
Expand All @@ -91,9 +91,9 @@ define <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: adrp x8, .LCPI5_0
; CHECK-NEXT: adrp x9, .LCPI5_1
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_1]
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_0]
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_1]
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: ret
%cond = icmp eq <4 x i32> %x, %y
%add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
Expand Down
950 changes: 475 additions & 475 deletions llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s

Large diffs are not rendered by default.