169 changes: 86 additions & 83 deletions llvm/test/CodeGen/AArch64/active_lane_mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ define <vscale x 2 x i1> @lane_mask_nxv2i1_i8(i8 %index, i8 %TC) {
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: and x8, x0, #0xff
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: and x8, x1, #0xff
; CHECK-NEXT: and z0.d, z0.d, #0xff
; CHECK-NEXT: add z0.d, z0.d, z1.d
Expand All @@ -153,49 +153,51 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i32(i32 %index, i32 %TC) {
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: index z0.s, #0, #1
; CHECK-NEXT: mov z1.s, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z25.s, w1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.d, z0.d
; CHECK-NEXT: mov z3.d, z0.d
; CHECK-NEXT: uqadd z6.s, z0.s, z1.s
; CHECK-NEXT: incw z0.s, all, mul #4
; CHECK-NEXT: incw z2.s
; CHECK-NEXT: incw z3.s, all, mul #2
; CHECK-NEXT: cmphi p2.s, p0/z, z25.s, z6.s
; CHECK-NEXT: uqadd z0.s, z0.s, z1.s
; CHECK-NEXT: cmphi p2.s, p0/z, z25.s, z6.s
; CHECK-NEXT: mov z4.d, z2.d
; CHECK-NEXT: uqadd z5.s, z2.s, z1.s
; CHECK-NEXT: uqadd z7.s, z3.s, z1.s
; CHECK-NEXT: incw z2.s, all, mul #4
; CHECK-NEXT: incw z3.s, all, mul #4
; CHECK-NEXT: cmphi p5.s, p0/z, z25.s, z0.s
; CHECK-NEXT: incw z4.s, all, mul #2
; CHECK-NEXT: cmphi p1.s, p0/z, z25.s, z5.s
; CHECK-NEXT: cmphi p3.s, p0/z, z25.s, z7.s
; CHECK-NEXT: uqadd z2.s, z2.s, z1.s
; CHECK-NEXT: uqadd z3.s, z3.s, z1.s
; CHECK-NEXT: cmphi p1.s, p0/z, z25.s, z5.s
; CHECK-NEXT: cmphi p3.s, p0/z, z25.s, z7.s
; CHECK-NEXT: uqadd z24.s, z4.s, z1.s
; CHECK-NEXT: incw z4.s, all, mul #4
; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h
; CHECK-NEXT: cmphi p6.s, p0/z, z25.s, z2.s
; CHECK-NEXT: cmphi p2.s, p0/z, z25.s, z3.s
; CHECK-NEXT: cmphi p7.s, p0/z, z25.s, z3.s
; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h
; CHECK-NEXT: uqadd z1.s, z4.s, z1.s
; CHECK-NEXT: cmphi p4.s, p0/z, z25.s, z24.s
; CHECK-NEXT: uzp1 p3.h, p3.h, p4.h
; CHECK-NEXT: cmphi p0.s, p0/z, z25.s, z1.s
; CHECK-NEXT: uzp1 p4.h, p5.h, p6.h
; CHECK-NEXT: uzp1 p2.h, p3.h, p4.h
; CHECK-NEXT: uzp1 p3.h, p5.h, p6.h
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p2.h, p2.h, p0.h
; CHECK-NEXT: uzp1 p0.b, p1.b, p3.b
; CHECK-NEXT: uzp1 p1.b, p4.b, p2.b
; CHECK-NEXT: uzp1 p4.h, p7.h, p0.h
; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p0.b, p1.b, p2.b
; CHECK-NEXT: uzp1 p1.b, p3.b, p4.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand All @@ -208,96 +210,97 @@ define <vscale x 32 x i1> @lane_mask_nxv32i1_i64(i64 %index, i64 %TC) {
; CHECK-LABEL: lane_mask_nxv32i1_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
; CHECK-NEXT: index z5.d, #0, #1
; CHECK-NEXT: mov z0.d, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z3.d, x1
; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: mov z4.d, z1.d
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: uqadd z25.d, z1.d, z0.d
; CHECK-NEXT: incd z1.d, all, mul #8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, z5.d
; CHECK-NEXT: mov z1.d, z5.d
; CHECK-NEXT: mov z4.d, z5.d
; CHECK-NEXT: uqadd z25.d, z5.d, z0.d
; CHECK-NEXT: incd z5.d, all, mul #8
; CHECK-NEXT: incd z2.d
; CHECK-NEXT: incd z4.d, all, mul #2
; CHECK-NEXT: incd z6.d, all, mul #4
; CHECK-NEXT: cmphi p1.d, p0/z, z3.d, z25.d
; CHECK-NEXT: uqadd z1.d, z1.d, z0.d
; CHECK-NEXT: mov z5.d, z2.d
; CHECK-NEXT: uqadd z26.d, z2.d, z0.d
; CHECK-NEXT: incd z1.d, all, mul #2
; CHECK-NEXT: incd z4.d, all, mul #4
; CHECK-NEXT: uqadd z5.d, z5.d, z0.d
; CHECK-NEXT: cmphi p3.d, p0/z, z3.d, z25.d
; CHECK-NEXT: mov z6.d, z2.d
; CHECK-NEXT: mov z7.d, z2.d
; CHECK-NEXT: mov z24.d, z4.d
; CHECK-NEXT: uqadd z27.d, z4.d, z0.d
; CHECK-NEXT: uqadd z28.d, z6.d, z0.d
; CHECK-NEXT: mov z24.d, z1.d
; CHECK-NEXT: uqadd z26.d, z2.d, z0.d
; CHECK-NEXT: uqadd z27.d, z1.d, z0.d
; CHECK-NEXT: uqadd z28.d, z4.d, z0.d
; CHECK-NEXT: incd z2.d, all, mul #8
; CHECK-NEXT: incd z1.d, all, mul #8
; CHECK-NEXT: incd z4.d, all, mul #8
; CHECK-NEXT: incd z6.d, all, mul #8
; CHECK-NEXT: incd z5.d, all, mul #2
; CHECK-NEXT: incd z6.d, all, mul #2
; CHECK-NEXT: incd z7.d, all, mul #4
; CHECK-NEXT: cmphi p2.d, p0/z, z3.d, z26.d
; CHECK-NEXT: incd z24.d, all, mul #4
; CHECK-NEXT: cmphi p3.d, p0/z, z3.d, z27.d
; CHECK-NEXT: cmphi p5.d, p0/z, z3.d, z28.d
; CHECK-NEXT: cmphi p4.d, p0/z, z3.d, z26.d
; CHECK-NEXT: cmphi p2.d, p0/z, z3.d, z27.d
; CHECK-NEXT: cmphi p1.d, p0/z, z3.d, z28.d
; CHECK-NEXT: mov z31.d, z6.d
; CHECK-NEXT: uqadd z29.d, z6.d, z0.d
; CHECK-NEXT: uqadd z30.d, z7.d, z0.d
; CHECK-NEXT: uqadd z8.d, z24.d, z0.d
; CHECK-NEXT: incd z6.d, all, mul #8
; CHECK-NEXT: incd z7.d, all, mul #8
; CHECK-NEXT: incd z24.d, all, mul #8
; CHECK-NEXT: uqadd z2.d, z2.d, z0.d
; CHECK-NEXT: uqadd z1.d, z1.d, z0.d
; CHECK-NEXT: incd z31.d, all, mul #4
; CHECK-NEXT: uqadd z4.d, z4.d, z0.d
; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s
; CHECK-NEXT: cmphi p5.d, p0/z, z3.d, z29.d
; CHECK-NEXT: cmphi p7.d, p0/z, z3.d, z30.d
; CHECK-NEXT: uqadd z6.d, z6.d, z0.d
; CHECK-NEXT: mov z26.d, z5.d
; CHECK-NEXT: uqadd z25.d, z5.d, z0.d
; CHECK-NEXT: uqadd z27.d, z7.d, z0.d
; CHECK-NEXT: incd z5.d, all, mul #8
; CHECK-NEXT: incd z7.d, all, mul #8
; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s
; CHECK-NEXT: incd z26.d, all, mul #4
; CHECK-NEXT: cmphi p8.d, p0/z, z3.d, z2.d
; CHECK-NEXT: cmphi p4.d, p0/z, z3.d, z25.d
; CHECK-NEXT: uqadd z25.d, z24.d, z0.d
; CHECK-NEXT: incd z24.d, all, mul #8
; CHECK-NEXT: uqadd z5.d, z5.d, z0.d
; CHECK-NEXT: cmphi p6.d, p0/z, z3.d, z8.d
; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: uqadd z7.d, z7.d, z0.d
; CHECK-NEXT: cmphi p6.d, p0/z, z3.d, z27.d
; CHECK-NEXT: uqadd z28.d, z26.d, z0.d
; CHECK-NEXT: incd z26.d, all, mul #8
; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s
; CHECK-NEXT: uqadd z25.d, z31.d, z0.d
; CHECK-NEXT: incd z31.d, all, mul #8
; CHECK-NEXT: uqadd z24.d, z24.d, z0.d
; CHECK-NEXT: cmphi p7.d, p0/z, z3.d, z25.d
; CHECK-NEXT: cmphi p4.d, p0/z, z3.d, z1.d
; CHECK-NEXT: uzp1 p5.s, p5.s, p6.s
; CHECK-NEXT: cmphi p6.d, p0/z, z3.d, z4.d
; CHECK-NEXT: cmphi p9.d, p0/z, z3.d, z5.d
; CHECK-NEXT: cmphi p10.d, p0/z, z3.d, z7.d
; CHECK-NEXT: uqadd z0.d, z26.d, z0.d
; CHECK-NEXT: cmphi p2.d, p0/z, z3.d, z28.d
; CHECK-NEXT: uzp1 p4.s, p4.s, p8.s
; CHECK-NEXT: cmphi p8.d, p0/z, z3.d, z24.d
; CHECK-NEXT: uzp1 p6.s, p6.s, p9.s
; CHECK-NEXT: cmphi p4.d, p0/z, z3.d, z5.d
; CHECK-NEXT: uzp1 p2.s, p2.s, p5.s
; CHECK-NEXT: cmphi p5.d, p0/z, z3.d, z2.d
; CHECK-NEXT: cmphi p9.d, p0/z, z3.d, z6.d
; CHECK-NEXT: uqadd z0.d, z31.d, z0.d
; CHECK-NEXT: uzp1 p1.s, p1.s, p7.s
; CHECK-NEXT: cmphi p7.d, p0/z, z3.d, z1.d
; CHECK-NEXT: cmphi p8.d, p0/z, z3.d, z25.d
; CHECK-NEXT: uzp1 p2.h, p3.h, p2.h
; CHECK-NEXT: cmphi p3.d, p0/z, z3.d, z7.d
; CHECK-NEXT: uzp1 p4.s, p4.s, p5.s
; CHECK-NEXT: uzp1 p5.s, p7.s, p9.s
; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p1.h, p1.h, p3.h
; CHECK-NEXT: uzp1 p2.s, p7.s, p2.s
; CHECK-NEXT: cmphi p7.d, p0/z, z3.d, z6.d
; CHECK-NEXT: uzp1 p6.s, p6.s, p8.s
; CHECK-NEXT: cmphi p8.d, p0/z, z3.d, z4.d
; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p4.h, p4.h, p5.h
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p1.h, p1.h, p6.h
; CHECK-NEXT: cmphi p6.d, p0/z, z3.d, z24.d
; CHECK-NEXT: cmphi p0.d, p0/z, z3.d, z0.d
; CHECK-NEXT: uzp1 p7.s, p7.s, p10.s
; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p0.s, p8.s, p0.s
; CHECK-NEXT: uzp1 p3.s, p8.s, p3.s
; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p3.h, p4.h, p6.h
; CHECK-NEXT: uzp1 p0.s, p6.s, p0.s
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p2.h, p5.h, p2.h
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p4.h, p7.h, p0.h
; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p0.b, p1.b, p2.b
; CHECK-NEXT: uzp1 p1.b, p3.b, p4.b
; CHECK-NEXT: uzp1 p3.h, p3.h, p0.h
; CHECK-NEXT: uzp1 p0.b, p2.b, p1.b
; CHECK-NEXT: uzp1 p1.b, p4.b, p3.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%active.lane.mask = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 %index, i64 %TC)
Expand Down Expand Up @@ -459,12 +462,12 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
; CHECK-NEXT: adrp x8, .LCPI26_0
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI26_0]
; CHECK-NEXT: dup v3.4h, w1
; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: bic v3.4h, #255, lsl #8
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: dup v1.4h, w1
; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h
; CHECK-NEXT: bic v1.4h, #255, lsl #8
; CHECK-NEXT: cmhi v0.4h, v1.4h, v0.4h
; CHECK-NEXT: cmhi v0.4h, v3.4h, v0.4h
; CHECK-NEXT: ret
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC)
ret <4 x i1> %active.lane.mask
Expand All @@ -480,9 +483,9 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
; CHECK-NEXT: dup v3.2s, w1
; CHECK-NEXT: and v1.8b, v1.8b, v0.8b
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
; CHECK-NEXT: umin v1.2s, v1.2s, v0.2s
; CHECK-NEXT: and v0.8b, v3.8b, v0.8b
; CHECK-NEXT: cmhi v0.2s, v0.2s, v1.2s
; CHECK-NEXT: and v2.8b, v3.8b, v0.8b
; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s
; CHECK-NEXT: cmhi v0.2s, v2.2s, v0.2s
; CHECK-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
ret <2 x i1> %active.lane.mask
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,19 @@ define <4 x bfloat> @uitofp_v4i64_to_v4bf16(ptr %ptr) {
; CHECK-LABEL: uitofp_v4i64_to_v4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: movi v2.4s, #127, msl #8
; CHECK-NEXT: ucvtf v0.2d, v0.2d
; CHECK-NEXT: ucvtf v1.2d, v1.2d
; CHECK-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
; CHECK-NEXT: movi v1.4s, #127, msl #8
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: ushr v3.4s, v0.4s, #16
; CHECK-NEXT: add v1.4s, v0.4s, v1.4s
; CHECK-NEXT: and v2.16b, v3.16b, v2.16b
; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
; CHECK-NEXT: fcmeq v2.4s, v0.4s, v0.4s
; CHECK-NEXT: add v2.4s, v0.4s, v2.4s
; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s
; CHECK-NEXT: orr v0.4s, #64, lsl #16
; CHECK-NEXT: bit v0.16b, v1.16b, v2.16b
; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b
; CHECK-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-NEXT: ret
%tmp1 = load <4 x i64>, ptr %ptr
Expand Down
106 changes: 53 additions & 53 deletions llvm/test/CodeGen/AArch64/arm64-vabs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -257,12 +257,12 @@ define i16 @uabd16b_rdx(ptr %a, ptr %b) {
; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
; CHECK-GI-NEXT: usubl.8h v3, v1, v2
; CHECK-GI-NEXT: usubl2.8h v1, v1, v2
; CHECK-GI-NEXT: neg.8h v2, v3
; CHECK-GI-NEXT: neg.8h v4, v1
; CHECK-GI-NEXT: cmgt.8h v5, v0, v3
; CHECK-GI-NEXT: cmgt.8h v2, v0, v3
; CHECK-GI-NEXT: cmgt.8h v0, v0, v1
; CHECK-GI-NEXT: bif.16b v2, v3, v5
; CHECK-GI-NEXT: bsl.16b v0, v4, v1
; CHECK-GI-NEXT: neg.8h v4, v3
; CHECK-GI-NEXT: neg.8h v5, v1
; CHECK-GI-NEXT: bsl.16b v2, v4, v3
; CHECK-GI-NEXT: bsl.16b v0, v5, v1
; CHECK-GI-NEXT: add.8h v0, v2, v0
; CHECK-GI-NEXT: addv.8h h0, v0
; CHECK-GI-NEXT: fmov w0, s0
Expand Down Expand Up @@ -299,18 +299,18 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
; CHECK-GI-NEXT: usubl2.4s v3, v3, v4
; CHECK-GI-NEXT: usubl.4s v4, v0, v1
; CHECK-GI-NEXT: usubl2.4s v0, v0, v1
; CHECK-GI-NEXT: neg.4s v6, v5
; CHECK-GI-NEXT: neg.4s v7, v3
; CHECK-GI-NEXT: cmgt.4s v1, v2, v5
; CHECK-GI-NEXT: neg.4s v16, v4
; CHECK-GI-NEXT: neg.4s v17, v0
; CHECK-GI-NEXT: cmgt.4s v18, v2, v3
; CHECK-GI-NEXT: cmgt.4s v19, v2, v4
; CHECK-GI-NEXT: cmgt.4s v6, v2, v3
; CHECK-GI-NEXT: neg.4s v16, v5
; CHECK-GI-NEXT: cmgt.4s v7, v2, v4
; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
; CHECK-GI-NEXT: bsl.16b v1, v6, v5
; CHECK-GI-NEXT: bit.16b v3, v7, v18
; CHECK-GI-NEXT: bit.16b v4, v16, v19
; CHECK-GI-NEXT: bit.16b v0, v17, v2
; CHECK-GI-NEXT: neg.4s v17, v3
; CHECK-GI-NEXT: neg.4s v18, v4
; CHECK-GI-NEXT: neg.4s v19, v0
; CHECK-GI-NEXT: bsl.16b v1, v16, v5
; CHECK-GI-NEXT: bit.16b v3, v17, v6
; CHECK-GI-NEXT: bit.16b v4, v18, v7
; CHECK-GI-NEXT: bit.16b v0, v19, v2
; CHECK-GI-NEXT: add.4s v1, v1, v3
; CHECK-GI-NEXT: add.4s v0, v4, v0
; CHECK-GI-NEXT: add.4s v0, v1, v0
Expand Down Expand Up @@ -347,18 +347,18 @@ define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
; CHECK-GI-NEXT: ssubl2.4s v3, v3, v4
; CHECK-GI-NEXT: ssubl.4s v4, v0, v1
; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1
; CHECK-GI-NEXT: neg.4s v6, v5
; CHECK-GI-NEXT: neg.4s v7, v3
; CHECK-GI-NEXT: cmgt.4s v1, v2, v5
; CHECK-GI-NEXT: neg.4s v16, v4
; CHECK-GI-NEXT: neg.4s v17, v0
; CHECK-GI-NEXT: cmgt.4s v18, v2, v3
; CHECK-GI-NEXT: cmgt.4s v19, v2, v4
; CHECK-GI-NEXT: cmgt.4s v6, v2, v3
; CHECK-GI-NEXT: neg.4s v16, v5
; CHECK-GI-NEXT: cmgt.4s v7, v2, v4
; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
; CHECK-GI-NEXT: bsl.16b v1, v6, v5
; CHECK-GI-NEXT: bit.16b v3, v7, v18
; CHECK-GI-NEXT: bit.16b v4, v16, v19
; CHECK-GI-NEXT: bit.16b v0, v17, v2
; CHECK-GI-NEXT: neg.4s v17, v3
; CHECK-GI-NEXT: neg.4s v18, v4
; CHECK-GI-NEXT: neg.4s v19, v0
; CHECK-GI-NEXT: bsl.16b v1, v16, v5
; CHECK-GI-NEXT: bit.16b v3, v17, v6
; CHECK-GI-NEXT: bit.16b v4, v18, v7
; CHECK-GI-NEXT: bit.16b v0, v19, v2
; CHECK-GI-NEXT: add.4s v1, v1, v3
; CHECK-GI-NEXT: add.4s v0, v4, v0
; CHECK-GI-NEXT: add.4s v0, v1, v0
Expand Down Expand Up @@ -396,12 +396,12 @@ define i32 @uabd8h_rdx(ptr %a, ptr %b) {
; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
; CHECK-GI-NEXT: usubl.4s v3, v1, v2
; CHECK-GI-NEXT: usubl2.4s v1, v1, v2
; CHECK-GI-NEXT: neg.4s v2, v3
; CHECK-GI-NEXT: neg.4s v4, v1
; CHECK-GI-NEXT: cmgt.4s v5, v0, v3
; CHECK-GI-NEXT: cmgt.4s v2, v0, v3
; CHECK-GI-NEXT: cmgt.4s v0, v0, v1
; CHECK-GI-NEXT: bif.16b v2, v3, v5
; CHECK-GI-NEXT: bsl.16b v0, v4, v1
; CHECK-GI-NEXT: neg.4s v4, v3
; CHECK-GI-NEXT: neg.4s v5, v1
; CHECK-GI-NEXT: bsl.16b v2, v4, v3
; CHECK-GI-NEXT: bsl.16b v0, v5, v1
; CHECK-GI-NEXT: add.4s v0, v2, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
Expand All @@ -428,15 +428,15 @@ define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-GI-LABEL: sabd8h_rdx:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: ssubl.4s v3, v0, v1
; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1
; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: neg.4s v1, v3
; CHECK-GI-NEXT: neg.4s v4, v0
; CHECK-GI-NEXT: cmgt.4s v5, v2, v3
; CHECK-GI-NEXT: neg.4s v4, v3
; CHECK-GI-NEXT: neg.4s v5, v0
; CHECK-GI-NEXT: cmgt.4s v1, v2, v3
; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
; CHECK-GI-NEXT: bif.16b v1, v3, v5
; CHECK-GI-NEXT: bit.16b v0, v4, v2
; CHECK-GI-NEXT: bsl.16b v1, v4, v3
; CHECK-GI-NEXT: bit.16b v0, v5, v2
; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
Expand All @@ -461,10 +461,10 @@ define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
;
; CHECK-GI-LABEL: uabdl4s_rdx_i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: usubl.4s v0, v0, v1
; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
; CHECK-GI-NEXT: cmgt.4s v1, v2, v0
; CHECK-GI-NEXT: neg.4s v2, v0
; CHECK-GI-NEXT: cmgt.4s v1, v1, v0
; CHECK-GI-NEXT: bit.16b v0, v2, v1
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
Expand Down Expand Up @@ -499,12 +499,12 @@ define i64 @uabd4s_rdx(ptr %a, ptr %b, i32 %h) {
; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
; CHECK-GI-NEXT: usubl.2d v3, v1, v2
; CHECK-GI-NEXT: usubl2.2d v1, v1, v2
; CHECK-GI-NEXT: neg.2d v2, v3
; CHECK-GI-NEXT: neg.2d v4, v1
; CHECK-GI-NEXT: cmgt.2d v5, v0, v3
; CHECK-GI-NEXT: cmgt.2d v2, v0, v3
; CHECK-GI-NEXT: cmgt.2d v0, v0, v1
; CHECK-GI-NEXT: bif.16b v2, v3, v5
; CHECK-GI-NEXT: bsl.16b v0, v4, v1
; CHECK-GI-NEXT: neg.2d v4, v3
; CHECK-GI-NEXT: neg.2d v5, v1
; CHECK-GI-NEXT: bsl.16b v2, v4, v3
; CHECK-GI-NEXT: bsl.16b v0, v5, v1
; CHECK-GI-NEXT: add.2d v0, v2, v0
; CHECK-GI-NEXT: addp.2d d0, v0
; CHECK-GI-NEXT: fmov x0, d0
Expand All @@ -531,15 +531,15 @@ define i64 @sabd4s_rdx(<4 x i32> %a, <4 x i32> %b) {
;
; CHECK-GI-LABEL: sabd4s_rdx:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: ssubl.2d v3, v0, v1
; CHECK-GI-NEXT: ssubl2.2d v0, v0, v1
; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: neg.2d v1, v3
; CHECK-GI-NEXT: neg.2d v4, v0
; CHECK-GI-NEXT: cmgt.2d v5, v2, v3
; CHECK-GI-NEXT: neg.2d v4, v3
; CHECK-GI-NEXT: neg.2d v5, v0
; CHECK-GI-NEXT: cmgt.2d v1, v2, v3
; CHECK-GI-NEXT: cmgt.2d v2, v2, v0
; CHECK-GI-NEXT: bif.16b v1, v3, v5
; CHECK-GI-NEXT: bit.16b v0, v4, v2
; CHECK-GI-NEXT: bsl.16b v1, v4, v3
; CHECK-GI-NEXT: bit.16b v0, v5, v2
; CHECK-GI-NEXT: add.2d v0, v1, v0
; CHECK-GI-NEXT: addp.2d d0, v0
; CHECK-GI-NEXT: fmov x0, d0
Expand All @@ -564,10 +564,10 @@ define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) {
;
; CHECK-GI-LABEL: uabdl2d_rdx_i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: usubl.2d v0, v0, v1
; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
; CHECK-GI-NEXT: cmgt.2d v1, v2, v0
; CHECK-GI-NEXT: neg.2d v2, v0
; CHECK-GI-NEXT: cmgt.2d v1, v1, v0
; CHECK-GI-NEXT: bit.16b v0, v2, v1
; CHECK-GI-NEXT: addp.2d d0, v0
; CHECK-GI-NEXT: fmov x0, d0
Expand Down Expand Up @@ -1796,10 +1796,10 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
;
; CHECK-GI-LABEL: uabd_i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: ssubl.2d v0, v0, v1
; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
; CHECK-GI-NEXT: cmgt.2d v1, v2, v0
; CHECK-GI-NEXT: neg.2d v2, v0
; CHECK-GI-NEXT: cmgt.2d v1, v1, v0
; CHECK-GI-NEXT: bit.16b v0, v2, v1
; CHECK-GI-NEXT: ret
%aext = sext <2 x i32> %a to <2 x i64>
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
Original file line number Diff line number Diff line change
Expand Up @@ -205,15 +205,15 @@ define <2 x bfloat> @test_vcvt_bf16_f64(<2 x double> %v) nounwind readnone ssp {
; GENERIC-LABEL: test_vcvt_bf16_f64:
; GENERIC: // %bb.0:
; GENERIC-NEXT: fcvtxn v0.2s, v0.2d
; GENERIC-NEXT: movi.4s v1, #127, msl #8
; GENERIC-NEXT: movi.4s v2, #1
; GENERIC-NEXT: movi.4s v1, #1
; GENERIC-NEXT: movi.4s v2, #127, msl #8
; GENERIC-NEXT: ushr.4s v3, v0, #16
; GENERIC-NEXT: add.4s v1, v0, v1
; GENERIC-NEXT: and.16b v2, v3, v2
; GENERIC-NEXT: add.4s v1, v2, v1
; GENERIC-NEXT: fcmeq.4s v2, v0, v0
; GENERIC-NEXT: add.4s v2, v0, v2
; GENERIC-NEXT: and.16b v1, v3, v1
; GENERIC-NEXT: fcmeq.4s v3, v0, v0
; GENERIC-NEXT: orr.4s v0, #64, lsl #16
; GENERIC-NEXT: bit.16b v0, v1, v2
; GENERIC-NEXT: add.4s v1, v1, v2
; GENERIC-NEXT: bit.16b v0, v1, v3
; GENERIC-NEXT: shrn.4h v0, v0, #16
; GENERIC-NEXT: ret
;
Expand All @@ -238,15 +238,15 @@ define <2 x bfloat> @test_vcvt_bf16_f64(<2 x double> %v) nounwind readnone ssp {
; GISEL-LABEL: test_vcvt_bf16_f64:
; GISEL: // %bb.0:
; GISEL-NEXT: fcvtxn v0.2s, v0.2d
; GISEL-NEXT: movi.4s v1, #127, msl #8
; GISEL-NEXT: movi.4s v2, #1
; GISEL-NEXT: movi.4s v1, #1
; GISEL-NEXT: movi.4s v2, #127, msl #8
; GISEL-NEXT: ushr.4s v3, v0, #16
; GISEL-NEXT: add.4s v1, v0, v1
; GISEL-NEXT: and.16b v2, v3, v2
; GISEL-NEXT: add.4s v1, v2, v1
; GISEL-NEXT: fcmeq.4s v2, v0, v0
; GISEL-NEXT: add.4s v2, v0, v2
; GISEL-NEXT: and.16b v1, v3, v1
; GISEL-NEXT: fcmeq.4s v3, v0, v0
; GISEL-NEXT: orr.4s v0, #64, lsl #16
; GISEL-NEXT: bit.16b v0, v1, v2
; GISEL-NEXT: add.4s v1, v1, v2
; GISEL-NEXT: bit.16b v0, v1, v3
; GISEL-NEXT: shrn.4h v0, v0, #16
; GISEL-NEXT: ret
%vcvt1.i = fptrunc <2 x double> %v to <2 x bfloat>
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/arm64-vhadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -903,10 +903,10 @@ define <2 x i16> @hadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK: // %bb.0:
; CHECK-NEXT: shl.2s v0, v0, #24
; CHECK-NEXT: shl.2s v1, v1, #24
; CHECK-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-NEXT: sshr.2s v0, v0, #24
; CHECK-NEXT: ssra.2s v0, v1, #24
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NEXT: and.8b v0, v0, v1
; CHECK-NEXT: and.8b v0, v0, v2
; CHECK-NEXT: ushr.2s v0, v0, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
Expand Down Expand Up @@ -968,10 +968,10 @@ define <4 x i16> @rhadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) {
; CHECK: // %bb.0:
; CHECK-NEXT: shl.4h v0, v0, #8
; CHECK-NEXT: shl.4h v1, v1, #8
; CHECK-NEXT: movi.4h v2, #1
; CHECK-NEXT: sshr.4h v0, v0, #8
; CHECK-NEXT: ssra.4h v0, v1, #8
; CHECK-NEXT: movi.4h v1, #1
; CHECK-NEXT: add.4h v0, v0, v1
; CHECK-NEXT: add.4h v0, v0, v2
; CHECK-NEXT: ushr.4h v0, v0, #1
; CHECK-NEXT: ret
%zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,22 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 4 x double> @mull_add(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c) {
; CHECK-LABEL: mull_add:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 z6.d, z0.d, z1.d
; CHECK-NEXT: uzp2 z7.d, z2.d, z3.d
; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
; CHECK-NEXT: uzp2 z6.d, z0.d, z1.d
; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
; CHECK-NEXT: uzp2 z1.d, z2.d, z3.d
; CHECK-NEXT: uzp1 z2.d, z2.d, z3.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uzp1 z1.d, z2.d, z3.d
; CHECK-NEXT: fmul z2.d, z6.d, z7.d
; CHECK-NEXT: fmul z3.d, z0.d, z7.d
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: fnmsb z1.d, p0/m, z6.d, z3.d
; CHECK-NEXT: uzp2 z2.d, z4.d, z5.d
; CHECK-NEXT: uzp1 z3.d, z4.d, z5.d
; CHECK-NEXT: fadd z2.d, z0.d, z2.d
; CHECK-NEXT: fmul z7.d, z0.d, z1.d
; CHECK-NEXT: fmul z1.d, z6.d, z1.d
; CHECK-NEXT: movprfx z3, z7
; CHECK-NEXT: fmla z3.d, p0/m, z6.d, z2.d
; CHECK-NEXT: fnmsb z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: uzp2 z1.d, z4.d, z5.d
; CHECK-NEXT: uzp1 z2.d, z4.d, z5.d
; CHECK-NEXT: fadd z2.d, z2.d, z0.d
; CHECK-NEXT: fadd z1.d, z3.d, z1.d
; CHECK-NEXT: zip1 z0.d, z1.d, z2.d
; CHECK-NEXT: zip2 z1.d, z1.d, z2.d
; CHECK-NEXT: zip1 z0.d, z2.d, z1.d
; CHECK-NEXT: zip2 z1.d, z2.d, z1.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -49,21 +50,21 @@ entry:
define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
; CHECK-LABEL: mul_add_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fadd z0.d, z25.d, z27.d
; CHECK-NEXT: fadd z1.d, z26.d, z24.d
; CHECK-NEXT: fadd z0.d, z25.d, z27.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -100,21 +101,21 @@ entry:
define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
; CHECK-LABEL: mul_sub_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fsub z0.d, z25.d, z27.d
; CHECK-NEXT: fsub z1.d, z26.d, z24.d
; CHECK-NEXT: fsub z0.d, z25.d, z27.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -151,21 +152,21 @@ entry:
define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
; CHECK-LABEL: mul_conj_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #270
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
; CHECK-NEXT: fadd z0.d, z25.d, z27.d
; CHECK-NEXT: fadd z1.d, z26.d, z24.d
; CHECK-NEXT: fadd z0.d, z25.d, z27.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -206,8 +207,8 @@ define <vscale x 4 x double> @mul_add_rot_mull(<vscale x 4 x double> %a, <vscale
; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: uzp1 z4.d, z4.d, z5.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z25.d, z25.d, #0x7fffffffffffffff
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: and z25.d, z25.d, #0x7fffffffffffffff
; CHECK-NEXT: and z26.d, z26.d, #0x8000000000000000
; CHECK-NEXT: orr z5.d, z25.d, z26.d
; CHECK-NEXT: fadd z5.d, z4.d, z5.d
Expand All @@ -220,18 +221,19 @@ define <vscale x 4 x double> @mul_add_rot_mull(<vscale x 4 x double> %a, <vscale
; CHECK-NEXT: fsub z4.d, z4.d, z24.d
; CHECK-NEXT: uzp2 z24.d, z6.d, z7.d
; CHECK-NEXT: uzp1 z6.d, z6.d, z7.d
; CHECK-NEXT: fmul z3.d, z0.d, z1.d
; CHECK-NEXT: fmul z26.d, z0.d, z1.d
; CHECK-NEXT: fmul z1.d, z25.d, z1.d
; CHECK-NEXT: fmul z7.d, z4.d, z24.d
; CHECK-NEXT: fmul z3.d, z4.d, z24.d
; CHECK-NEXT: fmul z24.d, z5.d, z24.d
; CHECK-NEXT: fmla z3.d, p0/m, z25.d, z2.d
; CHECK-NEXT: movprfx z7, z26
; CHECK-NEXT: fmla z7.d, p0/m, z25.d, z2.d
; CHECK-NEXT: fnmsb z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: movprfx z1, z7
; CHECK-NEXT: movprfx z1, z3
; CHECK-NEXT: fmla z1.d, p0/m, z6.d, z5.d
; CHECK-NEXT: movprfx z2, z24
; CHECK-NEXT: fnmls z2.d, p0/m, z4.d, z6.d
; CHECK-NEXT: fadd z2.d, z0.d, z2.d
; CHECK-NEXT: fadd z1.d, z3.d, z1.d
; CHECK-NEXT: fadd z1.d, z7.d, z1.d
; CHECK-NEXT: zip1 z0.d, z2.d, z1.d
; CHECK-NEXT: zip2 z1.d, z2.d, z1.d
; CHECK-NEXT: ret
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,19 @@ entry:
define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
; CHECK-LABEL: mul_add_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -90,19 +90,19 @@ entry:
define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
; CHECK-LABEL: mul_sub_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #270
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #270
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #180
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #180
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -139,19 +139,19 @@ entry:
define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
; CHECK-LABEL: mul_conj_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #270
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #270
; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand Down Expand Up @@ -188,24 +188,25 @@ entry:
define <vscale x 4 x double> @mul_add_rot_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) {
; CHECK-LABEL: mul_add_rot_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 z24.d, z2.d, z3.d
; CHECK-NEXT: uzp2 z24.d, z2.d, z3.d
; CHECK-NEXT: uzp2 z25.d, z0.d, z1.d
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uzp1 z2.d, z2.d, z3.d
; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
; CHECK-NEXT: uzp2 z1.d, z4.d, z5.d
; CHECK-NEXT: uzp1 z26.d, z6.d, z7.d
; CHECK-NEXT: fmul z1.d, z24.d, z25.d
; CHECK-NEXT: fmul z3.d, z2.d, z25.d
; CHECK-NEXT: uzp2 z25.d, z4.d, z5.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uzp1 z4.d, z4.d, z5.d
; CHECK-NEXT: uzp2 z5.d, z6.d, z7.d
; CHECK-NEXT: fmla z3.d, p0/m, z26.d, z25.d
; CHECK-NEXT: fmla z1.d, p0/m, z2.d, z0.d
; CHECK-NEXT: movprfx z2, z3
; CHECK-NEXT: fmla z2.d, p0/m, z5.d, z4.d
; CHECK-NEXT: fmla z1.d, p0/m, z26.d, z4.d
; CHECK-NEXT: fnmls z2.d, p0/m, z24.d, z0.d
; CHECK-NEXT: fmls z1.d, p0/m, z5.d, z25.d
; CHECK-NEXT: fmul z3.d, z2.d, z25.d
; CHECK-NEXT: fmul z25.d, z24.d, z25.d
; CHECK-NEXT: fmla z3.d, p0/m, z24.d, z0.d
; CHECK-NEXT: movprfx z24, z25
; CHECK-NEXT: fmla z24.d, p0/m, z26.d, z1.d
; CHECK-NEXT: movprfx z6, z24
; CHECK-NEXT: fmla z6.d, p0/m, z5.d, z4.d
; CHECK-NEXT: fmla z3.d, p0/m, z26.d, z4.d
; CHECK-NEXT: fnmsb z2.d, p0/m, z0.d, z6.d
; CHECK-NEXT: fmsb z1.d, p0/m, z5.d, z3.d
; CHECK-NEXT: zip1 z0.d, z2.d, z1.d
; CHECK-NEXT: zip2 z1.d, z2.d, z1.d
; CHECK-NEXT: ret
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ define <vscale x 4 x half> @complex_mul_v4f16(<vscale x 4 x half> %a, <vscale x
; CHECK-NEXT: uzp1 z0.d, z0.d, z2.d
; CHECK-NEXT: uzp2 z2.d, z1.d, z3.d
; CHECK-NEXT: uzp1 z1.d, z1.d, z3.d
; CHECK-NEXT: movprfx z3, z2
; CHECK-NEXT: fmul z3.h, p0/m, z3.h, z0.h
; CHECK-NEXT: movprfx z5, z2
; CHECK-NEXT: fmul z5.h, p0/m, z5.h, z0.h
; CHECK-NEXT: fmul z2.h, p0/m, z2.h, z4.h
; CHECK-NEXT: movprfx z3, z5
; CHECK-NEXT: fmla z3.h, p0/m, z1.h, z4.h
; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: zip2 z1.d, z0.d, z3.d
Expand Down Expand Up @@ -46,8 +47,8 @@ entry:
define <vscale x 8 x half> @complex_mul_v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: complex_mul_v8f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z2.h, #0 // =0x0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmla z2.h, p0/m, z1.h, z0.h, #0
; CHECK-NEXT: fcmla z2.h, p0/m, z1.h, z0.h, #90
; CHECK-NEXT: mov z0.d, z2.d
Expand All @@ -72,15 +73,15 @@ entry:
define <vscale x 16 x half> @complex_mul_v16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) {
; CHECK-LABEL: complex_mul_v16f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z4.h, #0 // =0x0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z5.d, z4.d
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #0
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #90
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #0
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #90
; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.experimental.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %a)
Expand All @@ -103,23 +104,23 @@ entry:
define <vscale x 32 x half> @complex_mul_v32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b) {
; CHECK-LABEL: complex_mul_v32f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z24.h, #0 // =0x0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #0
; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #0
; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #0
; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #0
; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #0
; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #90
; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #90
; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #90
; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #90
; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #90
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z1.d, z26.d
; CHECK-NEXT: mov z2.d, z27.d
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 16 x half>, <vscale x 16 x half> } @llvm.experimental.vector.deinterleave2.nxv32f16(<vscale x 32 x half> %a)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ target triple = "aarch64"
define <vscale x 4 x float> @complex_mul_v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: complex_mul_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, #0 // =0x0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmla z2.s, p0/m, z1.s, z0.s, #0
; CHECK-NEXT: fcmla z2.s, p0/m, z1.s, z0.s, #90
; CHECK-NEXT: mov z0.d, z2.d
Expand All @@ -34,15 +34,15 @@ entry:
define <vscale x 8 x float> @complex_mul_v8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) {
; CHECK-LABEL: complex_mul_v8f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z4.s, #0 // =0x0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z5.d, z4.d
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #0
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #90
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #0
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #90
; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.experimental.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %a)
Expand All @@ -65,23 +65,23 @@ entry:
define <vscale x 16 x float> @complex_mul_v16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b) {
; CHECK-LABEL: complex_mul_v16f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z24.s, #0 // =0x0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #0
; CHECK-NEXT: fcmla z25.s, p0/m, z4.s, z0.s, #0
; CHECK-NEXT: fcmla z26.s, p0/m, z5.s, z1.s, #0
; CHECK-NEXT: fcmla z27.s, p0/m, z6.s, z2.s, #0
; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #0
; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #90
; CHECK-NEXT: fcmla z25.s, p0/m, z4.s, z0.s, #90
; CHECK-NEXT: fcmla z26.s, p0/m, z5.s, z1.s, #90
; CHECK-NEXT: fcmla z27.s, p0/m, z6.s, z2.s, #90
; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #90
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z1.d, z26.d
; CHECK-NEXT: mov z2.d, z27.d
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.experimental.vector.deinterleave2.nxv16f32(<vscale x 16 x float> %a)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ target triple = "aarch64"
define <vscale x 2 x double> @complex_mul_v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmla z2.d, p0/m, z1.d, z0.d, #0
; CHECK-NEXT: fcmla z2.d, p0/m, z1.d, z0.d, #90
; CHECK-NEXT: mov z0.d, z2.d
Expand All @@ -34,15 +34,15 @@ entry:
define <vscale x 4 x double> @complex_mul_v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
; CHECK-LABEL: complex_mul_v4f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z4.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z5.d, z4.d
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #0
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #90
; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: mov z0.d, z5.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.experimental.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
Expand All @@ -65,23 +65,23 @@ entry:
define <vscale x 8 x double> @complex_mul_v8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b) {
; CHECK-LABEL: complex_mul_v8f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z24.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z25.d, z24.d
; CHECK-NEXT: mov z26.d, z24.d
; CHECK-NEXT: mov z27.d, z24.d
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #0
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #90
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #90
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #90
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z1.d, z26.d
; CHECK-NEXT: mov z2.d, z27.d
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.experimental.vector.deinterleave2.nxv8f64(<vscale x 8 x double> %a)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ define <vscale x 4 x i16> @complex_mul_v4i16(<vscale x 4 x i16> %a, <vscale x 4
; CHECK-NEXT: uzp2 z0.d, z0.d, z2.d
; CHECK-NEXT: uzp1 z2.d, z1.d, z3.d
; CHECK-NEXT: uzp2 z1.d, z1.d, z3.d
; CHECK-NEXT: mul z3.d, z2.d, z0.d
; CHECK-NEXT: mul z5.d, z2.d, z0.d
; CHECK-NEXT: mul z2.d, z2.d, z4.d
; CHECK-NEXT: movprfx z3, z5
; CHECK-NEXT: mla z3.d, p0/m, z1.d, z4.d
; CHECK-NEXT: msb z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: zip2 z1.d, z0.d, z3.d
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,37 @@ target triple = "aarch64"
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w9, #100 // =0x64
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: whilelo p1.d, xzr, x9
; CHECK-NEXT: mov w9, #100 // =0x64
; CHECK-NEXT: cntd x10
; CHECK-NEXT: whilelo p1.d, xzr, x9
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: rdvl x11, #2
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x12, x10
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: zip2 p3.d, p1.d, p1.d
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
; CHECK-NEXT: add x13, x0, x8
; CHECK-NEXT: add x14, x1, x8
; CHECK-NEXT: zip1 p2.d, p1.d, p1.d
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: mov z7.d, z0.d
; CHECK-NEXT: whilelo p1.d, x12, x9
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl]
; CHECK-NEXT: add x8, x8, x11
; CHECK-NEXT: add x12, x12, x10
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x13, #1, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x14, #1, mul vl]
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
; CHECK-NEXT: ld1d { z3.d }, p1/z, [x13]
; CHECK-NEXT: ld1d { z5.d }, p1/z, [x14]
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
; CHECK-NEXT: mov z0.d, p3/m, z7.d
; CHECK-NEXT: mov z1.d, p2/m, z6.d
; CHECK-NEXT: mov z0.d, p2/m, z7.d
; CHECK-NEXT: mov z1.d, p1/m, z6.d
; CHECK-NEXT: whilelo p1.d, x12, x9
; CHECK-NEXT: add x12, x12, x10
; CHECK-NEXT: b.mi .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
Expand Down Expand Up @@ -114,10 +114,10 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
; CHECK-LABEL: complex_mul_predicated_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cntd x10
; CHECK-NEXT: neg x11, x10
; CHECK-NEXT: mov w12, #100 // =0x64
; CHECK-NEXT: neg x11, x10
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: and x11, x11, x12
Expand All @@ -133,20 +133,20 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
; CHECK-NEXT: mov z7.d, z0.d
; CHECK-NEXT: add x9, x9, x10
; CHECK-NEXT: add x8, x8, x12
; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
; CHECK-NEXT: cmp x11, x9
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl]
; CHECK-NEXT: ld1d { z3.d }, p1/z, [x13]
; CHECK-NEXT: ld1d { z5.d }, p1/z, [x14]
; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p1/z, [x14, #1, mul vl]
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
; CHECK-NEXT: mov z0.d, p2/m, z7.d
; CHECK-NEXT: mov z1.d, p1/m, z6.d
; CHECK-NEXT: mov z0.d, p1/m, z7.d
; CHECK-NEXT: mov z1.d, p2/m, z6.d
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
Expand Down Expand Up @@ -217,8 +217,8 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, ptr %cond) {
; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: whilelo p1.d, xzr, x10
; CHECK-NEXT: mov x8, xzr
Expand All @@ -236,19 +236,19 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
; CHECK-NEXT: mov z7.d, z0.d
; CHECK-NEXT: add x9, x9, x11
; CHECK-NEXT: add x8, x8, x12
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: zip2 p3.d, p1.d, p1.d
; CHECK-NEXT: zip1 p2.d, p1.d, p1.d
; CHECK-NEXT: whilelo p1.d, x9, x10
; CHECK-NEXT: ld1d { z2.d }, p3/z, [x13, #1, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p3/z, [x14, #1, mul vl]
; CHECK-NEXT: cmpne p2.d, p1/z, z2.d, #0
; CHECK-NEXT: zip2 p1.d, p2.d, p2.d
; CHECK-NEXT: zip1 p2.d, p2.d, p2.d
; CHECK-NEXT: ld1d { z2.d }, p1/z, [x13, #1, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p1/z, [x14, #1, mul vl]
; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13]
; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14]
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
; CHECK-NEXT: mov z0.d, p3/m, z7.d
; CHECK-NEXT: mov z0.d, p1/m, z7.d
; CHECK-NEXT: whilelo p1.d, x9, x10
; CHECK-NEXT: mov z1.d, p2/m, z6.d
; CHECK-NEXT: b.mi .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cntd x9
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: neg x9, x9
; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: and x10, x9, x10
; CHECK-NEXT: rdvl x11, #2
Expand Down Expand Up @@ -101,18 +101,18 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_nonzero_init_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: fmov d0, #1.00000000
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: fmov d2, #2.00000000
; CHECK-NEXT: cntd x9
; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: fmov d2, #2.00000000
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: neg x9, x9
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: sel z3.d, p0, z0.d, z1.d
; CHECK-NEXT: and x10, x9, x10
; CHECK-NEXT: rdvl x11, #2
; CHECK-NEXT: sel z3.d, p0, z0.d, z1.d
; CHECK-NEXT: mov z1.d, p0/m, z2.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: zip2 z0.d, z1.d, z3.d
Expand Down Expand Up @@ -190,12 +190,12 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64_unrolled:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cntw x9
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: neg x9, x9
; CHECK-NEXT: mov w10, #1000 // =0x3e8
; CHECK-NEXT: neg x9, x9
; CHECK-NEXT: rdvl x12, #2
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: and x10, x9, x10
; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
Expand Down Expand Up @@ -324,10 +324,10 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
; CHECK-LABEL: reduction_mix:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z2.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cntd x9
; CHECK-NEXT: neg x10, x9
; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: neg x10, x9
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: and x10, x10, x11
; CHECK-NEXT: rdvl x11, #2
Expand All @@ -349,8 +349,8 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
; CHECK-NEXT: uaddv d2, p0, z2.d
; CHECK-NEXT: uzp2 z3.d, z1.d, z0.d
; CHECK-NEXT: uzp1 z1.d, z1.d, z0.d
; CHECK-NEXT: fmov x8, d2
; CHECK-NEXT: faddv d0, p0, z3.d
; CHECK-NEXT: fmov x8, d2
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ target triple = "aarch64"
define <vscale x 4 x double> @complex_mul_const(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
; CHECK-LABEL: complex_mul_const:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z4.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fmov z7.d, #3.00000000
; CHECK-NEXT: fmov z24.d, #11.00000000
; CHECK-NEXT: mov z6.d, z4.d
Expand Down Expand Up @@ -55,25 +55,25 @@ entry:
define <vscale x 4 x double> @complex_mul_non_const(<vscale x 4 x double> %a, <vscale x 4 x double> %b, [2 x double] %c) {
; CHECK-LABEL: complex_mul_non_const:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z6.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $d5 killed $d5 def $z5
; CHECK-NEXT: // kill: def $d4 killed $d4 def $z4
; CHECK-NEXT: mov z5.d, d5
; CHECK-NEXT: mov z4.d, d4
; CHECK-NEXT: mov z24.d, z6.d
; CHECK-NEXT: mov z7.d, z6.d
; CHECK-NEXT: zip2 z25.d, z4.d, z5.d
; CHECK-NEXT: zip1 z4.d, z4.d, z5.d
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z0.d, z2.d, #0
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: zip2 z1.d, z4.d, z5.d
; CHECK-NEXT: fcmla z7.d, p0/m, z0.d, z2.d, #90
; CHECK-NEXT: zip1 z2.d, z4.d, z5.d
; CHECK-NEXT: mov z0.d, z6.d
; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z25.d, #0
; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z25.d, #90
; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z1.d, #0
; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z2.d, #0
; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z1.d, #90
; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z2.d, #90
; CHECK-NEXT: mov z1.d, z6.d
; CHECK-NEXT: ret
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ entry:
define void @test_concat_fptrunc_v4f64_to_v4f32(ptr %ptr) #1 {
; CHECK-LABEL: test_concat_fptrunc_v4f64_to_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fmov z0.s, #1.00000000
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
entry:
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ define fastcc i8 @allocno_reload_assign() {
; CHECK-LABEL: allocno_reload_assign:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z16.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: uunpklo z1.h, z0.b
; CHECK-NEXT: uunpkhi z0.h, z0.b
Expand Down Expand Up @@ -48,12 +48,12 @@ define fastcc i8 @allocno_reload_assign() {
; CHECK-NEXT: punpklo p4.h, p3.b
; CHECK-NEXT: punpkhi p3.h, p3.b
; CHECK-NEXT: st1b { z2.d }, p4, [z16.d]
; CHECK-NEXT: punpklo p4.h, p2.b
; CHECK-NEXT: punpkhi p2.h, p2.b
; CHECK-NEXT: st1b { z3.d }, p3, [z16.d]
; CHECK-NEXT: punpklo p3.h, p4.b
; CHECK-NEXT: st1b { z4.d }, p3, [z16.d]
; CHECK-NEXT: punpkhi p3.h, p4.b
; CHECK-NEXT: punpklo p3.h, p2.b
; CHECK-NEXT: punpkhi p2.h, p2.b
; CHECK-NEXT: punpklo p4.h, p3.b
; CHECK-NEXT: punpkhi p3.h, p3.b
; CHECK-NEXT: st1b { z4.d }, p4, [z16.d]
; CHECK-NEXT: st1b { z5.d }, p3, [z16.d]
; CHECK-NEXT: punpklo p3.h, p2.b
; CHECK-NEXT: punpkhi p2.h, p2.b
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,13 +228,13 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, ptr %divdst) nounw
; ALL-NEXT: sdiv x9, x9, x8
; ALL-NEXT: sdiv x11, x11, x10
; ALL-NEXT: mul x8, x9, x8
; ALL-NEXT: fmov d2, x9
; ALL-NEXT: fmov d1, x8
; ALL-NEXT: mul x10, x11, x10
; ALL-NEXT: mov v2.d[1], x11
; ALL-NEXT: str q2, [x0]
; ALL-NEXT: mov v1.d[1], x10
; ALL-NEXT: sub v0.2d, v0.2d, v1.2d
; ALL-NEXT: fmov d1, x9
; ALL-NEXT: mov v1.d[1], x11
; ALL-NEXT: str q1, [x0]
; ALL-NEXT: ret
%div = sdiv <2 x i64> %x, %y
store <2 x i64> %div, ptr %divdst, align 16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,13 +228,13 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, ptr %divdst) nounw
; ALL-NEXT: udiv x9, x9, x8
; ALL-NEXT: udiv x11, x11, x10
; ALL-NEXT: mul x8, x9, x8
; ALL-NEXT: fmov d2, x9
; ALL-NEXT: fmov d1, x8
; ALL-NEXT: mul x10, x11, x10
; ALL-NEXT: mov v2.d[1], x11
; ALL-NEXT: str q2, [x0]
; ALL-NEXT: mov v1.d[1], x10
; ALL-NEXT: sub v0.2d, v0.2d, v1.2d
; ALL-NEXT: fmov d1, x9
; ALL-NEXT: mov v1.d[1], x11
; ALL-NEXT: str q1, [x0]
; ALL-NEXT: ret
%div = udiv <2 x i64> %x, %y
store <2 x i64> %div, ptr %divdst, align 16
Expand Down
121 changes: 61 additions & 60 deletions llvm/test/CodeGen/AArch64/extbinopload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -502,9 +502,9 @@ define <16 x i32> @double_bv_4xv4i8_i32(ptr %p, ptr %q, ptr %r, ptr %s, ptr %t,
; CHECK-NEXT: usubl v4.8h, v4.8b, v5.8b
; CHECK-NEXT: ld1 { v6.s }[1], [x7], #4
; CHECK-NEXT: ld1 { v7.s }[1], [x7]
; CHECK-NEXT: usubl v5.8h, v6.8b, v7.8b
; CHECK-NEXT: shll v0.4s, v4.4h, #16
; CHECK-NEXT: shll2 v4.4s, v4.8h, #16
; CHECK-NEXT: usubl v5.8h, v6.8b, v7.8b
; CHECK-NEXT: saddw v0.4s, v0.4s, v1.4h
; CHECK-NEXT: saddw2 v1.4s, v4.4s, v1.8h
; CHECK-NEXT: shll v6.4s, v5.4h, #16
Expand Down Expand Up @@ -647,10 +647,10 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s1, [x0]
; CHECK-NEXT: add x8, x3, #8
; CHECK-NEXT: add x11, x3, #12
; CHECK-NEXT: add x11, x1, #12
; CHECK-NEXT: str s1, [x4]
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: ldp s0, s4, [x2]
; CHECK-NEXT: ldr s0, [x2]
; CHECK-NEXT: ushll v2.8h, v0.8b, #0
; CHECK-NEXT: umov w9, v2.h[0]
; CHECK-NEXT: umov w10, v2.h[1]
Expand All @@ -664,31 +664,32 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-NEXT: add x9, x1, #4
; CHECK-NEXT: mov v1.d[1], v2.d[0]
; CHECK-NEXT: mov v0.b[11], w10
; CHECK-NEXT: add x10, x1, #12
; CHECK-NEXT: add x10, x3, #12
; CHECK-NEXT: bic v1.8h, #255, lsl #8
; CHECK-NEXT: ld1 { v0.s }[3], [x3], #4
; CHECK-NEXT: ldr s3, [x0, #12]
; CHECK-NEXT: ldp s2, s7, [x0, #4]
; CHECK-NEXT: ld1 { v4.s }[1], [x3]
; CHECK-NEXT: ldp s5, s6, [x2, #8]
; CHECK-NEXT: ld1 { v3.s }[1], [x10]
; CHECK-NEXT: ld1 { v2.s }[1], [x9]
; CHECK-NEXT: ld1 { v5.s }[1], [x8]
; CHECK-NEXT: ld1 { v6.s }[1], [x11]
; CHECK-NEXT: ldr s5, [x0, #4]
; CHECK-NEXT: ldp s2, s3, [x2, #4]
; CHECK-NEXT: ldr s7, [x2, #12]
; CHECK-NEXT: ldp s6, s4, [x0, #8]
; CHECK-NEXT: ld1 { v5.s }[1], [x9]
; CHECK-NEXT: ld1 { v7.s }[1], [x10]
; CHECK-NEXT: ld1 { v3.s }[1], [x8]
; CHECK-NEXT: ld1 { v2.s }[1], [x3]
; CHECK-NEXT: add x8, x1, #8
; CHECK-NEXT: ld1 { v7.s }[1], [x8]
; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
; CHECK-NEXT: ushll v3.8h, v5.8b, #0
; CHECK-NEXT: uaddl v4.8h, v4.8b, v6.8b
; CHECK-NEXT: uaddw v1.8h, v1.8h, v7.8b
; CHECK-NEXT: ld1 { v4.s }[1], [x11]
; CHECK-NEXT: ld1 { v6.s }[1], [x8]
; CHECK-NEXT: ushll v3.8h, v3.8b, #0
; CHECK-NEXT: uaddl v2.8h, v2.8b, v7.8b
; CHECK-NEXT: uaddl v4.8h, v5.8b, v4.8b
; CHECK-NEXT: uaddw v1.8h, v1.8h, v6.8b
; CHECK-NEXT: uaddw2 v5.8h, v3.8h, v0.16b
; CHECK-NEXT: ushll v0.4s, v2.4h, #3
; CHECK-NEXT: ushll v6.4s, v2.4h, #3
; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3
; CHECK-NEXT: ushll v6.4s, v4.4h, #3
; CHECK-NEXT: ushll v0.4s, v4.4h, #3
; CHECK-NEXT: ushll2 v3.4s, v4.8h, #3
; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h
; CHECK-NEXT: uaddw2 v1.4s, v2.4s, v1.8h
; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v5.8h
; CHECK-NEXT: uaddw2 v1.4s, v3.4s, v1.8h
; CHECK-NEXT: uaddw2 v3.4s, v2.4s, v5.8h
; CHECK-NEXT: uaddw v2.4s, v6.4s, v5.4h
; CHECK-NEXT: ret
%lp1 = load <4 x i8>, ptr %p
Expand Down Expand Up @@ -762,35 +763,35 @@ define <16 x i32> @extrause_shuffle(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-NEXT: add x8, x1, #8
; CHECK-NEXT: ldr s6, [x1, #12]
; CHECK-NEXT: ldp s17, s18, [x2, #8]
; CHECK-NEXT: ldp s2, s3, [x2]
; CHECK-NEXT: ldp s3, s5, [x2]
; CHECK-NEXT: add x9, x3, #8
; CHECK-NEXT: mov v4.16b, v1.16b
; CHECK-NEXT: ldp s7, s16, [x0]
; CHECK-NEXT: ldr s5, [x3, #12]
; CHECK-NEXT: ldr s2, [x3, #12]
; CHECK-NEXT: mov v1.s[1], v6.s[0]
; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4
; CHECK-NEXT: ld1 { v3.s }[1], [x3], #4
; CHECK-NEXT: mov v4.s[1], v6.s[0]
; CHECK-NEXT: ld1 { v7.s }[1], [x1], #4
; CHECK-NEXT: ld1 { v16.s }[1], [x1]
; CHECK-NEXT: ld1 { v3.s }[1], [x3]
; CHECK-NEXT: ld1 { v0.s }[1], [x8]
; CHECK-NEXT: ld1 { v5.s }[1], [x3]
; CHECK-NEXT: ld1 { v17.s }[1], [x9]
; CHECK-NEXT: ld1 { v0.s }[1], [x8]
; CHECK-NEXT: mov v4.s[2], v18.s[0]
; CHECK-NEXT: mov v18.s[1], v5.s[0]
; CHECK-NEXT: mov v18.s[1], v2.s[0]
; CHECK-NEXT: uaddl v1.8h, v16.8b, v1.8b
; CHECK-NEXT: uaddl v6.8h, v7.8b, v0.8b
; CHECK-NEXT: uaddl v2.8h, v2.8b, v17.8b
; CHECK-NEXT: uaddl v3.8h, v3.8b, v18.8b
; CHECK-NEXT: uaddl v7.8h, v3.8b, v17.8b
; CHECK-NEXT: ushll v0.4s, v1.4h, #3
; CHECK-NEXT: ushll2 v1.4s, v1.8h, #3
; CHECK-NEXT: mov v4.s[3], v5.s[0]
; CHECK-NEXT: uaddl v5.8h, v5.8b, v18.8b
; CHECK-NEXT: mov v4.s[3], v2.s[0]
; CHECK-NEXT: uaddw v0.4s, v0.4s, v6.4h
; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v6.8h
; CHECK-NEXT: ushll v7.4s, v3.4h, #3
; CHECK-NEXT: ushll2 v3.4s, v3.8h, #3
; CHECK-NEXT: ushll v16.4s, v5.4h, #3
; CHECK-NEXT: ushll2 v3.4s, v5.8h, #3
; CHECK-NEXT: str q4, [x4]
; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v2.8h
; CHECK-NEXT: uaddw v2.4s, v7.4s, v2.4h
; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v7.8h
; CHECK-NEXT: uaddw v2.4s, v16.4s, v7.4h
; CHECK-NEXT: ret
%lp1 = load <4 x i8>, ptr %p
%p2 = getelementptr i8, ptr %p, i32 4
Expand Down Expand Up @@ -873,8 +874,8 @@ define <16 x i32> @extrause_ext(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-NEXT: ld1 { v4.s }[1], [x11]
; CHECK-NEXT: ld1 { v2.s }[1], [x3]
; CHECK-NEXT: ld1 { v0.s }[1], [x10]
; CHECK-NEXT: ld1 { v7.s }[1], [x9]
; CHECK-NEXT: ld1 { v6.s }[1], [x8]
; CHECK-NEXT: ld1 { v7.s }[1], [x9]
; CHECK-NEXT: uaddl v5.8h, v5.8b, v4.8b
; CHECK-NEXT: uaddl v2.8h, v2.8b, v0.8b
; CHECK-NEXT: ushll v16.8h, v0.8b, #0
Expand Down Expand Up @@ -972,8 +973,8 @@ define <16 x i32> @extrause_add(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-NEXT: ld1 { v5.s }[1], [x11]
; CHECK-NEXT: ld1 { v3.s }[1], [x3]
; CHECK-NEXT: ld1 { v7.s }[1], [x10]
; CHECK-NEXT: ld1 { v4.s }[1], [x9]
; CHECK-NEXT: ld1 { v6.s }[1], [x8]
; CHECK-NEXT: ld1 { v4.s }[1], [x9]
; CHECK-NEXT: uaddl v5.8h, v1.8b, v5.8b
; CHECK-NEXT: uaddl v7.8h, v3.8b, v7.8b
; CHECK-NEXT: uaddl v1.8h, v0.8b, v4.8b
Expand Down Expand Up @@ -1072,23 +1073,23 @@ define <16 x i32> @extrause_ext2(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-NEXT: ld1 { v6.s }[1], [x9]
; CHECK-NEXT: ld1 { v4.s }[1], [x8]
; CHECK-NEXT: uaddl v7.8h, v3.8b, v7.8b
; CHECK-NEXT: uaddl v3.8h, v1.8b, v5.8b
; CHECK-NEXT: uaddl v1.8h, v1.8b, v5.8b
; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b
; CHECK-NEXT: uaddl v4.8h, v0.8b, v4.8b
; CHECK-NEXT: ushll v0.4s, v7.4h, #3
; CHECK-NEXT: ushll2 v1.4s, v7.8h, #3
; CHECK-NEXT: ushll v5.4s, v3.4h, #3
; CHECK-NEXT: ushll2 v6.4s, v3.8h, #3
; CHECK-NEXT: ushll2 v16.4s, v3.8h, #0
; CHECK-NEXT: ushll v17.4s, v3.4h, #0
; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v2.8h
; CHECK-NEXT: ushll2 v3.4s, v7.8h, #3
; CHECK-NEXT: ushll v6.4s, v1.4h, #3
; CHECK-NEXT: ushll2 v16.4s, v1.8h, #3
; CHECK-NEXT: ushll2 v5.4s, v1.8h, #0
; CHECK-NEXT: ushll v17.4s, v1.4h, #0
; CHECK-NEXT: ushll2 v18.4s, v7.8h, #0
; CHECK-NEXT: uaddw2 v1.4s, v3.4s, v2.8h
; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h
; CHECK-NEXT: uaddw v2.4s, v5.4s, v4.4h
; CHECK-NEXT: uaddw2 v3.4s, v6.4s, v4.8h
; CHECK-NEXT: ushll2 v4.4s, v7.8h, #0
; CHECK-NEXT: ushll v5.4s, v7.4h, #0
; CHECK-NEXT: stp q17, q16, [x4, #32]
; CHECK-NEXT: stp q5, q4, [x4]
; CHECK-NEXT: uaddw v2.4s, v6.4s, v4.4h
; CHECK-NEXT: uaddw2 v3.4s, v16.4s, v4.8h
; CHECK-NEXT: ushll v4.4s, v7.4h, #0
; CHECK-NEXT: stp q17, q5, [x4, #32]
; CHECK-NEXT: stp q4, q18, [x4]
; CHECK-NEXT: ret
%lp1 = load <4 x i8>, ptr %p
%p2 = getelementptr i8, ptr %p, i32 4
Expand Down Expand Up @@ -1157,32 +1158,32 @@ define <16 x i32> @extrause_ext2(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
define <16 x i32> @extrause_shl(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-LABEL: extrause_shl:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp s0, s1, [x0]
; CHECK-NEXT: ldp s1, s2, [x0]
; CHECK-NEXT: add x10, x3, #12
; CHECK-NEXT: ldp s2, s3, [x2]
; CHECK-NEXT: ldp s0, s3, [x2]
; CHECK-NEXT: add x11, x1, #12
; CHECK-NEXT: ldp s4, s5, [x0, #8]
; CHECK-NEXT: add x8, x3, #8
; CHECK-NEXT: ldp s6, s7, [x2, #8]
; CHECK-NEXT: add x9, x1, #8
; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4
; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4
; CHECK-NEXT: ld1 { v1.s }[1], [x1]
; CHECK-NEXT: ld1 { v0.s }[1], [x3], #4
; CHECK-NEXT: ld1 { v1.s }[1], [x1], #4
; CHECK-NEXT: ld1 { v2.s }[1], [x1]
; CHECK-NEXT: ld1 { v5.s }[1], [x11]
; CHECK-NEXT: ld1 { v3.s }[1], [x3]
; CHECK-NEXT: ld1 { v7.s }[1], [x10]
; CHECK-NEXT: ld1 { v4.s }[1], [x9]
; CHECK-NEXT: ld1 { v6.s }[1], [x8]
; CHECK-NEXT: uaddl v1.8h, v1.8b, v5.8b
; CHECK-NEXT: uaddl v2.8h, v2.8b, v5.8b
; CHECK-NEXT: uaddl v3.8h, v3.8b, v7.8b
; CHECK-NEXT: uaddl v4.8h, v0.8b, v4.8b
; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b
; CHECK-NEXT: ushll v5.4s, v1.4h, #3
; CHECK-NEXT: uaddl v4.8h, v1.8b, v4.8b
; CHECK-NEXT: ushll v5.4s, v2.4h, #3
; CHECK-NEXT: ushll2 v7.4s, v2.8h, #3
; CHECK-NEXT: uaddl v2.8h, v0.8b, v6.8b
; CHECK-NEXT: ushll v6.4s, v3.4h, #3
; CHECK-NEXT: ushll2 v7.4s, v1.8h, #3
; CHECK-NEXT: ushll2 v16.4s, v3.8h, #3
; CHECK-NEXT: uaddw v0.4s, v5.4s, v4.4h
; CHECK-NEXT: uaddw2 v1.4s, v7.4s, v4.8h
; CHECK-NEXT: uaddw v0.4s, v5.4s, v4.4h
; CHECK-NEXT: stp q5, q7, [x4]
; CHECK-NEXT: uaddw2 v3.4s, v16.4s, v2.8h
; CHECK-NEXT: uaddw v2.4s, v6.4s, v2.4h
Expand Down
108 changes: 54 additions & 54 deletions llvm/test/CodeGen/AArch64/fcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,10 @@ define <3 x i32> @v3f64_i32(<3 x double> %a, <3 x double> %b, <3 x i32> %d, <3 x
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v2.4s
; CHECK-GI-NEXT: fmov s2, w8
; CHECK-GI-NEXT: mov v2.s[1], w8
; CHECK-GI-NEXT: neg v3.4s, v1.4s
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: neg v1.4s, v1.4s
; CHECK-GI-NEXT: mov v2.s[2], w8
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v3.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: eor v1.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: and v0.16b, v6.16b, v0.16b
; CHECK-GI-NEXT: and v1.16b, v7.16b, v1.16b
Expand Down Expand Up @@ -348,10 +348,10 @@ define <3 x float> @v3f32_float(<3 x float> %a, <3 x float> %b, <3 x float> %d,
; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: mov v1.s[1], w8
; CHECK-GI-NEXT: neg v5.4s, v4.4s
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v4.4s
; CHECK-GI-NEXT: neg v4.4s, v4.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v4.4s
; CHECK-GI-NEXT: mov v1.s[2], w8
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v5.4s
; CHECK-GI-NEXT: eor v1.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: and v0.16b, v2.16b, v0.16b
; CHECK-GI-NEXT: and v1.16b, v3.16b, v1.16b
Expand Down Expand Up @@ -426,10 +426,10 @@ define <3 x i32> @v3f32_i32(<3 x float> %a, <3 x float> %b, <3 x i32> %d, <3 x i
; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: mov v1.s[1], w8
; CHECK-GI-NEXT: neg v5.4s, v4.4s
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v4.4s
; CHECK-GI-NEXT: neg v4.4s, v4.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v4.4s
; CHECK-GI-NEXT: mov v1.s[2], w8
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v5.4s
; CHECK-GI-NEXT: eor v1.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: and v0.16b, v2.16b, v0.16b
; CHECK-GI-NEXT: and v1.16b, v3.16b, v1.16b
Expand Down Expand Up @@ -545,8 +545,8 @@ define <7 x half> @v7f16_half(<7 x half> %a, <7 x half> %b, <7 x half> %d, <7 x
; CHECK-GI-NOFP16-LABEL: v7f16_half:
; CHECK-GI-NOFP16: // %bb.0: // %entry
; CHECK-GI-NOFP16-NEXT: mov w8, #15 // =0xf
; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[5]
; CHECK-GI-NOFP16-NEXT: fmov s4, w8
; CHECK-GI-NOFP16-NEXT: mov h16, v1.h[4]
; CHECK-GI-NOFP16-NEXT: mov w8, #65535 // =0xffff
Expand All @@ -555,32 +555,32 @@ define <7 x half> @v7f16_half(<7 x half> %a, <7 x half> %b, <7 x half> %d, <7 x
; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[6]
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-GI-NOFP16-NEXT: mov v5.16b, v4.16b
; CHECK-GI-NOFP16-NEXT: mov v6.h[1], v7.h[0]
; CHECK-GI-NOFP16-NEXT: fmov s7, w8
; CHECK-GI-NOFP16-NEXT: mov v7.16b, v4.16b
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v6.h[0]
; CHECK-GI-NOFP16-NEXT: fmov s6, w8
; CHECK-GI-NOFP16-NEXT: mov v16.h[1], v17.h[0]
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v4.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.16b, v7.16b
; CHECK-GI-NOFP16-NEXT: mov v7.h[1], v4.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.16b, v6.16b
; CHECK-GI-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
; CHECK-GI-NOFP16-NEXT: mov v6.h[2], v18.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.h[1], v7.h[0]
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v18.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.h[1], v6.h[0]
; CHECK-GI-NOFP16-NEXT: mov v16.h[2], v19.h[0]
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v4.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v6.4h
; CHECK-GI-NOFP16-NEXT: mov v17.h[2], v7.h[0]
; CHECK-GI-NOFP16-NEXT: mov v7.h[2], v4.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v5.4h
; CHECK-GI-NOFP16-NEXT: mov v17.h[2], v6.h[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v16.4h
; CHECK-GI-NOFP16-NEXT: mov v5.h[3], v4.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.h[3], v7.h[0]
; CHECK-GI-NOFP16-NEXT: fcmgt v1.4s, v16.4s, v6.4s
; CHECK-GI-NOFP16-NEXT: mov v5.h[4], v4.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.h[4], v7.h[0]
; CHECK-GI-NOFP16-NEXT: mov v7.h[3], v4.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.h[3], v6.h[0]
; CHECK-GI-NOFP16-NEXT: fcmgt v1.4s, v16.4s, v5.4s
; CHECK-GI-NOFP16-NEXT: mov v7.h[4], v4.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.h[4], v6.h[0]
; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-GI-NOFP16-NEXT: mov v5.h[5], v4.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.h[5], v7.h[0]
; CHECK-GI-NOFP16-NEXT: mov v5.h[6], v4.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.h[6], v7.h[0]
; CHECK-GI-NOFP16-NEXT: neg v1.8h, v5.8h
; CHECK-GI-NOFP16-NEXT: ushl v0.8h, v0.8h, v5.8h
; CHECK-GI-NOFP16-NEXT: mov v7.h[5], v4.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.h[5], v6.h[0]
; CHECK-GI-NOFP16-NEXT: mov v7.h[6], v4.h[0]
; CHECK-GI-NOFP16-NEXT: mov v17.h[6], v6.h[0]
; CHECK-GI-NOFP16-NEXT: ushl v0.8h, v0.8h, v7.8h
; CHECK-GI-NOFP16-NEXT: neg v1.8h, v7.8h
; CHECK-GI-NOFP16-NEXT: sshl v0.8h, v0.8h, v1.8h
; CHECK-GI-NOFP16-NEXT: eor v1.16b, v0.16b, v17.16b
; CHECK-GI-NOFP16-NEXT: and v0.16b, v2.16b, v0.16b
Expand Down Expand Up @@ -609,8 +609,8 @@ define <7 x half> @v7f16_half(<7 x half> %a, <7 x half> %b, <7 x half> %d, <7 x
; CHECK-GI-FP16-NEXT: mov v7.h[5], v6.h[0]
; CHECK-GI-FP16-NEXT: mov v5.h[6], v4.h[0]
; CHECK-GI-FP16-NEXT: mov v7.h[6], v6.h[0]
; CHECK-GI-FP16-NEXT: neg v1.8h, v5.8h
; CHECK-GI-FP16-NEXT: ushl v0.8h, v0.8h, v5.8h
; CHECK-GI-FP16-NEXT: neg v1.8h, v5.8h
; CHECK-GI-FP16-NEXT: sshl v0.8h, v0.8h, v1.8h
; CHECK-GI-FP16-NEXT: eor v1.16b, v0.16b, v7.16b
; CHECK-GI-FP16-NEXT: and v0.16b, v2.16b, v0.16b
Expand Down Expand Up @@ -1047,13 +1047,15 @@ define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32
; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[6]
; CHECK-GI-NOFP16-NEXT: fmov s16, w0
; CHECK-GI-NOFP16-NEXT: fmov s18, w4
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v3.h[0]
; CHECK-GI-NOFP16-NEXT: fmov s3, w8
; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v5.h[0]
; CHECK-GI-NOFP16-NEXT: ldr s5, [sp]
; CHECK-GI-NOFP16-NEXT: mov v16.s[1], w1
; CHECK-GI-NOFP16-NEXT: mov v18.s[1], w5
; CHECK-GI-NOFP16-NEXT: mov v3.s[1], w8
; CHECK-GI-NOFP16-NEXT: fmov w9, s5
; CHECK-GI-NOFP16-NEXT: fmov s5, w7
Expand All @@ -1069,27 +1071,25 @@ define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32
; CHECK-GI-NOFP16-NEXT: mov v3.s[2], w8
; CHECK-GI-NOFP16-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-GI-NOFP16-NEXT: mov v7.s[1], v17.s[0]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
; CHECK-GI-NOFP16-NEXT: ldr s17, [sp, #40]
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
; CHECK-GI-NOFP16-NEXT: mov v18.s[2], w6
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
; CHECK-GI-NOFP16-NEXT: mov v16.s[3], w3
; CHECK-GI-NOFP16-NEXT: mov v5.s[2], w9
; CHECK-GI-NOFP16-NEXT: neg v18.4s, v3.4s
; CHECK-GI-NOFP16-NEXT: mov v7.s[2], v17.s[0]
; CHECK-GI-NOFP16-NEXT: fcmgt v2.4s, v4.4s, v2.4s
; CHECK-GI-NOFP16-NEXT: fmov s4, w8
; CHECK-GI-NOFP16-NEXT: mov v4.s[1], w8
; CHECK-GI-NOFP16-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-GI-NOFP16-NEXT: fmov s3, w4
; CHECK-GI-NOFP16-NEXT: mov v3.s[1], w5
; CHECK-GI-NOFP16-NEXT: neg v3.4s, v3.4s
; CHECK-GI-NOFP16-NEXT: mov v4.s[2], w8
; CHECK-GI-NOFP16-NEXT: sshl v2.4s, v2.4s, v18.4s
; CHECK-GI-NOFP16-NEXT: sshl v2.4s, v2.4s, v3.4s
; CHECK-GI-NOFP16-NEXT: fmov w8, s6
; CHECK-GI-NOFP16-NEXT: mov v3.s[2], w6
; CHECK-GI-NOFP16-NEXT: eor v1.16b, v2.16b, v4.16b
; CHECK-GI-NOFP16-NEXT: mov v5.s[3], w8
; CHECK-GI-NOFP16-NEXT: eor v1.16b, v2.16b, v4.16b
; CHECK-GI-NOFP16-NEXT: and v2.16b, v18.16b, v2.16b
; CHECK-GI-NOFP16-NEXT: and v1.16b, v7.16b, v1.16b
; CHECK-GI-NOFP16-NEXT: and v2.16b, v3.16b, v2.16b
; CHECK-GI-NOFP16-NEXT: bsl v0.16b, v16.16b, v5.16b
; CHECK-GI-NOFP16-NEXT: orr v1.16b, v2.16b, v1.16b
; CHECK-GI-NOFP16-NEXT: mov s2, v0.s[1]
Expand All @@ -1111,57 +1111,57 @@ define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32
; CHECK-GI-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
; CHECK-GI-FP16-NEXT: mov w10, #31 // =0x1f
; CHECK-GI-FP16-NEXT: ldr s3, [sp]
; CHECK-GI-FP16-NEXT: fmov s1, w10
; CHECK-GI-FP16-NEXT: fmov s2, w10
; CHECK-GI-FP16-NEXT: fmov s6, w0
; CHECK-GI-FP16-NEXT: ldr s4, [sp, #8]
; CHECK-GI-FP16-NEXT: fmov s17, w4
; CHECK-GI-FP16-NEXT: ldr s7, [sp, #24]
; CHECK-GI-FP16-NEXT: ldr s16, [sp, #32]
; CHECK-GI-FP16-NEXT: umov w8, v0.h[4]
; CHECK-GI-FP16-NEXT: umov w9, v0.h[5]
; CHECK-GI-FP16-NEXT: mov v1.s[1], w10
; CHECK-GI-FP16-NEXT: mov v2.s[1], w10
; CHECK-GI-FP16-NEXT: mov v6.s[1], w1
; CHECK-GI-FP16-NEXT: mov v17.s[1], w5
; CHECK-GI-FP16-NEXT: mov v7.s[1], v16.s[0]
; CHECK-GI-FP16-NEXT: ldr s16, [sp, #40]
; CHECK-GI-FP16-NEXT: fmov s2, w8
; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: umov w8, v0.h[6]
; CHECK-GI-FP16-NEXT: mov v1.s[2], w10
; CHECK-GI-FP16-NEXT: mov v2.s[2], w10
; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-FP16-NEXT: mov v6.s[2], w2
; CHECK-GI-FP16-NEXT: mov v17.s[2], w6
; CHECK-GI-FP16-NEXT: mov v7.s[2], v16.s[0]
; CHECK-GI-FP16-NEXT: mov v2.s[1], w9
; CHECK-GI-FP16-NEXT: mov v1.s[1], w9
; CHECK-GI-FP16-NEXT: mov w9, #-1 // =0xffffffff
; CHECK-GI-FP16-NEXT: fmov s5, w9
; CHECK-GI-FP16-NEXT: neg v17.4s, v1.4s
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
; CHECK-GI-FP16-NEXT: mov v6.s[3], w3
; CHECK-GI-FP16-NEXT: mov v2.s[2], w8
; CHECK-GI-FP16-NEXT: mov v1.s[2], w8
; CHECK-GI-FP16-NEXT: fmov w8, s3
; CHECK-GI-FP16-NEXT: fmov s3, w7
; CHECK-GI-FP16-NEXT: mov v5.s[1], w9
; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31
; CHECK-GI-FP16-NEXT: mov v3.s[1], w8
; CHECK-GI-FP16-NEXT: fmov w8, s4
; CHECK-GI-FP16-NEXT: ldr s4, [sp, #16]
; CHECK-GI-FP16-NEXT: ushl v1.4s, v2.4s, v1.4s
; CHECK-GI-FP16-NEXT: fmov s2, w4
; CHECK-GI-FP16-NEXT: ushl v1.4s, v1.4s, v2.4s
; CHECK-GI-FP16-NEXT: neg v2.4s, v2.4s
; CHECK-GI-FP16-NEXT: mov v5.s[2], w9
; CHECK-GI-FP16-NEXT: mov v2.s[1], w5
; CHECK-GI-FP16-NEXT: mov v3.s[2], w8
; CHECK-GI-FP16-NEXT: sshl v1.4s, v1.4s, v17.4s
; CHECK-GI-FP16-NEXT: sshl v1.4s, v1.4s, v2.4s
; CHECK-GI-FP16-NEXT: fmov w8, s4
; CHECK-GI-FP16-NEXT: eor v4.16b, v1.16b, v5.16b
; CHECK-GI-FP16-NEXT: mov v2.s[2], w6
; CHECK-GI-FP16-NEXT: eor v2.16b, v1.16b, v5.16b
; CHECK-GI-FP16-NEXT: and v1.16b, v17.16b, v1.16b
; CHECK-GI-FP16-NEXT: mov v3.s[3], w8
; CHECK-GI-FP16-NEXT: and v1.16b, v2.16b, v1.16b
; CHECK-GI-FP16-NEXT: and v2.16b, v7.16b, v4.16b
; CHECK-GI-FP16-NEXT: and v2.16b, v7.16b, v2.16b
; CHECK-GI-FP16-NEXT: bsl v0.16b, v6.16b, v3.16b
; CHECK-GI-FP16-NEXT: orr v1.16b, v1.16b, v2.16b
; CHECK-GI-FP16-NEXT: mov s2, v0.s[1]
; CHECK-GI-FP16-NEXT: mov s3, v0.s[2]
; CHECK-GI-FP16-NEXT: mov s4, v0.s[3]
; CHECK-GI-FP16-NEXT: fmov w0, s0
; CHECK-GI-FP16-NEXT: mov s5, v1.s[1]
; CHECK-GI-FP16-NEXT: mov s6, v1.s[2]
; CHECK-GI-FP16-NEXT: fmov w0, s0
; CHECK-GI-FP16-NEXT: fmov w4, s1
; CHECK-GI-FP16-NEXT: fmov w1, s2
; CHECK-GI-FP16-NEXT: fmov w2, s3
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/fdiv-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ entry:
define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
; CHECK-LABEL: splat_fdiv_nxv2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z0.d, d0
; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -604,8 +604,8 @@ define fastcc i1 @quantum_hadamard(i32 %0) {
define <vscale x 4 x float> @fdiv_pow2_nx4xfloat(<vscale x 4 x i32> %i) "target-features"="+sve" {
; CHECK-LABEL: fdiv_pow2_nx4xfloat:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, #1 // =0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: fmov z1.s, #9.00000000
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/fp-veclib-expansion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ define <vscale x 4 x float> @frem_nxv4f32(<vscale x 4 x float> %unused, <vscale
; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; ARMPL-NEXT: .cfi_def_cfa_offset 16
; ARMPL-NEXT: .cfi_offset w30, -16
; ARMPL-NEXT: ptrue p0.s
; ARMPL-NEXT: mov z0.d, z1.d
; ARMPL-NEXT: mov z1.d, z2.d
; ARMPL-NEXT: ptrue p0.s
; ARMPL-NEXT: bl armpl_svfmod_f32_x
; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; ARMPL-NEXT: ret
Expand All @@ -74,9 +74,9 @@ define <vscale x 4 x float> @frem_nxv4f32(<vscale x 4 x float> %unused, <vscale
; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; SLEEF-NEXT: .cfi_def_cfa_offset 16
; SLEEF-NEXT: .cfi_offset w30, -16
; SLEEF-NEXT: ptrue p0.s
; SLEEF-NEXT: mov z0.d, z1.d
; SLEEF-NEXT: mov z1.d, z2.d
; SLEEF-NEXT: ptrue p0.s
; SLEEF-NEXT: bl _ZGVsMxvv_fmodf
; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; SLEEF-NEXT: ret
Expand All @@ -90,9 +90,9 @@ define <vscale x 2 x double> @frem_strict_nxv2f64(<vscale x 2 x double> %unused,
; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; ARMPL-NEXT: .cfi_def_cfa_offset 16
; ARMPL-NEXT: .cfi_offset w30, -16
; ARMPL-NEXT: ptrue p0.d
; ARMPL-NEXT: mov z0.d, z1.d
; ARMPL-NEXT: mov z1.d, z2.d
; ARMPL-NEXT: ptrue p0.d
; ARMPL-NEXT: bl armpl_svfmod_f64_x
; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; ARMPL-NEXT: ret
Expand All @@ -102,9 +102,9 @@ define <vscale x 2 x double> @frem_strict_nxv2f64(<vscale x 2 x double> %unused,
; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; SLEEF-NEXT: .cfi_def_cfa_offset 16
; SLEEF-NEXT: .cfi_offset w30, -16
; SLEEF-NEXT: ptrue p0.d
; SLEEF-NEXT: mov z0.d, z1.d
; SLEEF-NEXT: mov z1.d, z2.d
; SLEEF-NEXT: ptrue p0.d
; SLEEF-NEXT: bl _ZGVsMxvv_fmod
; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; SLEEF-NEXT: ret
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,10 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s
; CHECK-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
Expand Down Expand Up @@ -833,10 +833,10 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s
; CHECK-NEXT: ret
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -697,9 +697,9 @@ define <2 x i1> @test_signed_v2f32_v2i1(<2 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: fcvtzs v0.2s, v0.2s
; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s
; CHECK-NEXT: ret
%x = call <2 x i1> @llvm.fptosi.sat.v2f32.v2i1(<2 x float> %f)
ret <2 x i1> %x
Expand Down Expand Up @@ -1620,9 +1620,9 @@ define <4 x i1> @test_signed_v4f16_v4i1(<4 x half> %f) {
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000
; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h
; CHECK-FP16-NEXT: movi v2.2d, #0xffffffffffffffff
; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v2.4h
; CHECK-FP16-NEXT: ret
%x = call <4 x i1> @llvm.fptosi.sat.v4f16.v4i1(<4 x half> %f)
ret <4 x i1> %x
Expand Down Expand Up @@ -1668,9 +1668,9 @@ define <4 x i13> @test_signed_v4f16_v4i13(<4 x half> %f) {
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h
; CHECK-FP16-NEXT: mvni v1.4h, #240, lsl #8
; CHECK-FP16-NEXT: movi v2.4h, #240, lsl #8
; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: movi v1.4h, #240, lsl #8
; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v2.4h
; CHECK-FP16-NEXT: ret
%x = call <4 x i13> @llvm.fptosi.sat.v4f16.v4i13(<4 x half> %f)
ret <4 x i13> %x
Expand Down Expand Up @@ -2103,9 +2103,9 @@ define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000
; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h
; CHECK-FP16-NEXT: movi v2.2d, #0xffffffffffffffff
; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v2.8h
; CHECK-FP16-NEXT: xtn v0.8b, v0.8h
; CHECK-FP16-NEXT: ret
%x = call <8 x i1> @llvm.fptosi.sat.v8f16.v8i1(<8 x half> %f)
Expand Down Expand Up @@ -2254,9 +2254,9 @@ define <8 x i13> @test_signed_v8f16_v8i13(<8 x half> %f) {
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h
; CHECK-FP16-NEXT: mvni v1.8h, #240, lsl #8
; CHECK-FP16-NEXT: movi v2.8h, #240, lsl #8
; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: movi v1.8h, #240, lsl #8
; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v2.8h
; CHECK-FP16-NEXT: ret
%x = call <8 x i13> @llvm.fptosi.sat.v8f16.v8i13(<8 x half> %f)
ret <8 x i13> %x
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.4s, #31
; CHECK-NEXT: neg v3.4s, v1.4s
; CHECK-NEXT: and v3.16b, v3.16b, v2.16b
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: and v2.16b, v3.16b, v2.16b
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: neg v2.4s, v3.4s
; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: vec_4xi32_nonsplat_eq:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI13_0
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s
; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/icmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ define <3 x i32> @v3i32_i32(<3 x i32> %a, <3 x i32> %b, <3 x i32> %d, <3 x i32>
; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: mov v1.s[1], w8
; CHECK-GI-NEXT: neg v5.4s, v4.4s
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v4.4s
; CHECK-GI-NEXT: neg v4.4s, v4.4s
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v4.4s
; CHECK-GI-NEXT: mov v1.s[2], w8
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v5.4s
; CHECK-GI-NEXT: eor v1.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: and v0.16b, v2.16b, v0.16b
; CHECK-GI-NEXT: and v1.16b, v3.16b, v1.16b
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/CodeGen/AArch64/insert-extend.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,104 +64,104 @@ define i32 @large(ptr nocapture noundef readonly %p1, i32 noundef %st1, ptr noca
; CHECK-NEXT: ldr d3, [x11]
; CHECK-NEXT: ldr d4, [x10, x8]
; CHECK-NEXT: ldr d5, [x11, x9]
; CHECK-NEXT: shll2 v6.4s, v0.8h, #16
; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b
; CHECK-NEXT: usubl v3.8h, v4.8b, v5.8b
; CHECK-NEXT: shll2 v4.4s, v0.8h, #16
; CHECK-NEXT: shll2 v5.4s, v1.8h, #16
; CHECK-NEXT: saddw v0.4s, v4.4s, v0.4h
; CHECK-NEXT: shll2 v4.4s, v1.8h, #16
; CHECK-NEXT: saddw v0.4s, v6.4s, v0.4h
; CHECK-NEXT: shll2 v6.4s, v2.8h, #16
; CHECK-NEXT: saddw v1.4s, v5.4s, v1.4h
; CHECK-NEXT: shll2 v4.4s, v3.8h, #16
; CHECK-NEXT: saddw v2.4s, v6.4s, v2.4h
; CHECK-NEXT: saddw v3.4s, v4.4s, v3.4h
; CHECK-NEXT: shll2 v5.4s, v3.8h, #16
; CHECK-NEXT: saddw v1.4s, v4.4s, v1.4h
; CHECK-NEXT: rev64 v4.4s, v0.4s
; CHECK-NEXT: saddw v2.4s, v6.4s, v2.4h
; CHECK-NEXT: saddw v3.4s, v5.4s, v3.4h
; CHECK-NEXT: rev64 v5.4s, v1.4s
; CHECK-NEXT: rev64 v6.4s, v2.4s
; CHECK-NEXT: rev64 v7.4s, v3.4s
; CHECK-NEXT: sub v4.4s, v0.4s, v4.4s
; CHECK-NEXT: addp v0.4s, v1.4s, v0.4s
; CHECK-NEXT: rev64 v7.4s, v3.4s
; CHECK-NEXT: sub v5.4s, v1.4s, v5.4s
; CHECK-NEXT: sub v6.4s, v2.4s, v6.4s
; CHECK-NEXT: addp v2.4s, v3.4s, v2.4s
; CHECK-NEXT: sub v7.4s, v3.4s, v7.4s
; CHECK-NEXT: zip1 v16.4s, v5.4s, v4.4s
; CHECK-NEXT: ext v1.16b, v2.16b, v2.16b, #8
; CHECK-NEXT: sub v7.4s, v3.4s, v7.4s
; CHECK-NEXT: zip2 v3.4s, v6.4s, v7.4s
; CHECK-NEXT: mov v6.s[1], v7.s[0]
; CHECK-NEXT: ext v7.16b, v5.16b, v16.16b, #8
; CHECK-NEXT: mov v5.s[3], v4.s[2]
; CHECK-NEXT: uzp1 v4.4s, v1.4s, v0.4s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v0.4s
; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8
; CHECK-NEXT: mov v6.d[1], v7.d[1]
; CHECK-NEXT: mov v3.d[1], v5.d[1]
; CHECK-NEXT: uzp1 v1.4s, v4.4s, v0.4s
; CHECK-NEXT: uzp2 v4.4s, v4.4s, v0.4s
; CHECK-NEXT: uzp2 v5.4s, v2.4s, v0.4s
; CHECK-NEXT: uzp1 v0.4s, v2.4s, v0.4s
; CHECK-NEXT: sub v1.4s, v4.4s, v1.4s
; CHECK-NEXT: add v2.4s, v3.4s, v6.4s
; CHECK-NEXT: sub v3.4s, v6.4s, v3.4s
; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s
; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
; CHECK-NEXT: rev64 v6.4s, v1.4s
; CHECK-NEXT: rev64 v4.4s, v2.4s
; CHECK-NEXT: rev64 v5.4s, v3.4s
; CHECK-NEXT: addp v16.4s, v1.4s, v3.4s
; CHECK-NEXT: rev64 v6.4s, v1.4s
; CHECK-NEXT: rev64 v7.4s, v0.4s
; CHECK-NEXT: addp v16.4s, v1.4s, v3.4s
; CHECK-NEXT: addp v17.4s, v0.4s, v2.4s
; CHECK-NEXT: sub v1.4s, v1.4s, v6.4s
; CHECK-NEXT: sub v3.4s, v3.4s, v5.4s
; CHECK-NEXT: sub v2.4s, v2.4s, v4.4s
; CHECK-NEXT: sub v1.4s, v1.4s, v6.4s
; CHECK-NEXT: sub v0.4s, v0.4s, v7.4s
; CHECK-NEXT: zip1 v18.4s, v17.4s, v17.4s
; CHECK-NEXT: ext v6.16b, v1.16b, v16.16b, #8
; CHECK-NEXT: ext v4.16b, v17.16b, v2.16b, #4
; CHECK-NEXT: ext v5.16b, v16.16b, v3.16b, #4
; CHECK-NEXT: mov v20.16b, v3.16b
; CHECK-NEXT: ext v6.16b, v1.16b, v16.16b, #8
; CHECK-NEXT: ext v7.16b, v0.16b, v17.16b, #4
; CHECK-NEXT: mov v21.16b, v2.16b
; CHECK-NEXT: trn2 v0.4s, v18.4s, v0.4s
; CHECK-NEXT: ext v19.16b, v6.16b, v1.16b, #4
; CHECK-NEXT: mov v1.s[2], v16.s[1]
; CHECK-NEXT: mov v20.s[2], v16.s[3]
; CHECK-NEXT: zip2 v4.4s, v4.4s, v17.4s
; CHECK-NEXT: zip2 v5.4s, v5.4s, v16.4s
; CHECK-NEXT: mov v21.s[2], v17.s[3]
; CHECK-NEXT: ext v19.16b, v6.16b, v1.16b, #4
; CHECK-NEXT: ext v7.16b, v7.16b, v7.16b, #4
; CHECK-NEXT: mov v18.16b, v1.16b
; CHECK-NEXT: mov v1.s[2], v16.s[1]
; CHECK-NEXT: ext v2.16b, v2.16b, v4.16b, #12
; CHECK-NEXT: ext v3.16b, v3.16b, v5.16b, #12
; CHECK-NEXT: uzp2 v4.4s, v6.4s, v19.4s
; CHECK-NEXT: mov v5.16b, v7.16b
; CHECK-NEXT: mov v6.16b, v20.16b
; CHECK-NEXT: mov v18.16b, v1.16b
; CHECK-NEXT: mov v19.16b, v21.16b
; CHECK-NEXT: mov v18.s[1], v16.s[0]
; CHECK-NEXT: sub v7.4s, v0.4s, v7.4s
; CHECK-NEXT: mov v6.s[1], v16.s[2]
; CHECK-NEXT: mov v5.s[0], v17.s[1]
; CHECK-NEXT: mov v18.s[1], v16.s[0]
; CHECK-NEXT: mov v19.s[1], v17.s[2]
; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s
; CHECK-NEXT: sub v16.4s, v20.4s, v3.4s
; CHECK-NEXT: sub v17.4s, v21.4s, v2.4s
; CHECK-NEXT: add v4.4s, v18.4s, v4.4s
; CHECK-NEXT: add v3.4s, v6.4s, v3.4s
; CHECK-NEXT: add v0.4s, v0.4s, v5.4s
; CHECK-NEXT: add v4.4s, v18.4s, v4.4s
; CHECK-NEXT: add v2.4s, v19.4s, v2.4s
; CHECK-NEXT: mov v4.d[1], v1.d[1]
; CHECK-NEXT: mov v3.d[1], v16.d[1]
; CHECK-NEXT: mov v0.d[1], v7.d[1]
; CHECK-NEXT: mov v4.d[1], v1.d[1]
; CHECK-NEXT: mov v2.d[1], v17.d[1]
; CHECK-NEXT: cmlt v6.8h, v4.8h, #0
; CHECK-NEXT: cmlt v1.8h, v3.8h, #0
; CHECK-NEXT: cmlt v5.8h, v0.8h, #0
; CHECK-NEXT: cmlt v6.8h, v4.8h, #0
; CHECK-NEXT: cmlt v7.8h, v2.8h, #0
; CHECK-NEXT: add v4.4s, v6.4s, v4.4s
; CHECK-NEXT: add v3.4s, v1.4s, v3.4s
; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
; CHECK-NEXT: add v4.4s, v6.4s, v4.4s
; CHECK-NEXT: add v2.4s, v7.4s, v2.4s
; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b
; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b
; CHECK-NEXT: eor v2.16b, v2.16b, v7.16b
; CHECK-NEXT: eor v3.16b, v4.16b, v6.16b
; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b
; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w8, s0
Expand Down
70 changes: 35 additions & 35 deletions llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_zero_i8(ptr %a, ptr %
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.s }, p1/z, [x1]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1]
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: ret
Expand All @@ -25,9 +25,9 @@ define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_nonzero_i8(ptr %a, pt
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.s }, p1/z, [x1]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1]
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
Expand All @@ -41,9 +41,9 @@ define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_zero_i16(ptr %a, ptr
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.d }, p1/z, [x1]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x1]
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
; CHECK-NEXT: ret
Expand All @@ -57,9 +57,9 @@ define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_nonzero_i16(ptr %a,
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.d }, p1/z, [x1]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x1]
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
; CHECK-NEXT: ret
Expand All @@ -76,10 +76,10 @@ define <vscale x 8 x i8> @vec_scalable_subvec_fixed_idx_zero_i8(ptr %a, ptr %b)
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ptrue p1.h, vl8
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ld1b { z1.h }, p0/z, [x0]
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret
%vec = load <vscale x 8 x i8>, ptr %a
%subvec = load <8 x i8>, ptr %b
Expand All @@ -92,19 +92,19 @@ define <vscale x 8 x i8> @vec_scalable_subvec_fixed_idx_nonzero_i8(ptr %a, ptr %
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cnth x8
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sub x8, x8, #8
; CHECK-NEXT: mov w9, #8 // =0x8
; CHECK-NEXT: cmp x8, #8
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: ld1b { z1.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z1.h }, p0, [sp]
; CHECK-NEXT: str q0, [x9, x8]
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand All @@ -120,10 +120,10 @@ define <vscale x 4 x i16> @vec_scalable_subvec_fixed_idx_zero_i16(ptr %a, ptr %b
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ptrue p1.s, vl4
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0]
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: ret
%vec = load <vscale x 4 x i16>, ptr %a
%subvec = load <4 x i16>, ptr %b
Expand All @@ -136,19 +136,19 @@ define <vscale x 4 x i16> @vec_scalable_subvec_fixed_idx_nonzero_i16(ptr %a, ptr
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cntw x8
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sub x8, x8, #4
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: cmp x8, #4
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z1.s }, p0, [sp]
; CHECK-NEXT: str q0, [x9, x8]
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand All @@ -164,10 +164,10 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_zero_i32(ptr %a, ptr %b
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ptrue p1.d, vl2
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0]
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ret
%vec = load <vscale x 2 x i32>, ptr %a
%subvec = load <2 x i32>, ptr %b
Expand All @@ -180,19 +180,19 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_i32(ptr %a, ptr
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cntd x8
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: sub x8, x8, #2
; CHECK-NEXT: mov w9, #2 // =0x2
; CHECK-NEXT: cmp x8, #2
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0]
; CHECK-NEXT: st1d { z1.d }, p0, [sp]
; CHECK-NEXT: str q0, [x9, x8]
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
Expand Down
Loading