24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ define void @fadd_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: fadd z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fadd_v32f16:
Expand Down Expand Up @@ -156,8 +156,8 @@ define void @fadd_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: fadd z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fadd_v16f32:
Expand Down Expand Up @@ -255,8 +255,8 @@ define void @fadd_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: fadd z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fadd_v8f64:
Expand Down Expand Up @@ -662,8 +662,8 @@ define void @fma_v32f16(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; VBITS_GE_256-NEXT: movprfx z1, z5
; VBITS_GE_256-NEXT: fmla z1.h, p0/m, z3.h, z4.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fma_v32f16:
Expand Down Expand Up @@ -773,8 +773,8 @@ define void @fma_v16f32(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; VBITS_GE_256-NEXT: movprfx z1, z5
; VBITS_GE_256-NEXT: fmla z1.s, p0/m, z3.s, z4.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fma_v16f32:
Expand Down Expand Up @@ -883,8 +883,8 @@ define void @fma_v8f64(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; VBITS_GE_256-NEXT: movprfx z1, z5
; VBITS_GE_256-NEXT: fmla z1.d, p0/m, z3.d, z4.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fma_v8f64:
Expand Down Expand Up @@ -992,8 +992,8 @@ define void @fmul_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: fmul z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmul_v32f16:
Expand Down Expand Up @@ -1091,8 +1091,8 @@ define void @fmul_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: fmul z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmul_v16f32:
Expand Down Expand Up @@ -1190,8 +1190,8 @@ define void @fmul_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: fmul z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: fmul z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmul_v8f64:
Expand Down Expand Up @@ -1829,8 +1829,8 @@ define void @fsub_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: fsub z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fsub_v32f16:
Expand Down Expand Up @@ -1928,8 +1928,8 @@ define void @fsub_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: fsub z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fsub_v16f32:
Expand Down Expand Up @@ -2027,8 +2027,8 @@ define void @fsub_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: fsub z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: fsub z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fsub_v8f64:
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ define void @fma_v32f16(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; VBITS_GE_256-NEXT: movprfx z1, z5
; VBITS_GE_256-NEXT: fmla z1.h, p0/m, z3.h, z4.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fma_v32f16:
Expand Down Expand Up @@ -183,8 +183,8 @@ define void @fma_v16f32(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; VBITS_GE_256-NEXT: movprfx z1, z5
; VBITS_GE_256-NEXT: fmla z1.s, p0/m, z3.s, z4.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fma_v16f32:
Expand Down Expand Up @@ -299,8 +299,8 @@ define void @fma_v8f64(ptr %a, ptr %b, ptr %c) #0 {
; VBITS_GE_256-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; VBITS_GE_256-NEXT: movprfx z1, z5
; VBITS_GE_256-NEXT: fmla z1.d, p0/m, z3.d, z4.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fma_v8f64:
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ define void @fmaxnm_v32f16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h
; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmaxnm_v32f16:
Expand Down Expand Up @@ -156,8 +156,8 @@ define void @fmaxnm_v16f32(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s
; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmaxnm_v16f32:
Expand Down Expand Up @@ -255,8 +255,8 @@ define void @fmaxnm_v8f64(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmaxnm_v8f64:
Expand Down Expand Up @@ -358,8 +358,8 @@ define void @fminnm_v32f16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fminnm z1.h, p0/m, z1.h, z3.h
; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fminnm_v32f16:
Expand Down Expand Up @@ -457,8 +457,8 @@ define void @fminnm_v16f32(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fminnm z1.s, p0/m, z1.s, z3.s
; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fminnm_v16f32:
Expand Down Expand Up @@ -556,8 +556,8 @@ define void @fminnm_v8f64(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fminnm z1.d, p0/m, z1.d, z3.d
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fminnm_v8f64:
Expand Down Expand Up @@ -659,8 +659,8 @@ define void @fmax_v32f16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fmax z0.h, p0/m, z0.h, z1.h
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fmax z1.h, p0/m, z1.h, z3.h
; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmax_v32f16:
Expand Down Expand Up @@ -758,8 +758,8 @@ define void @fmax_v16f32(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fmax z0.s, p0/m, z0.s, z1.s
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fmax z1.s, p0/m, z1.s, z3.s
; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmax_v16f32:
Expand Down Expand Up @@ -857,8 +857,8 @@ define void @fmax_v8f64(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fmax z0.d, p0/m, z0.d, z1.d
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fmax z1.d, p0/m, z1.d, z3.d
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmax_v8f64:
Expand Down Expand Up @@ -960,8 +960,8 @@ define void @fmin_v32f16(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fmin z0.h, p0/m, z0.h, z1.h
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fmin z1.h, p0/m, z1.h, z3.h
; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmin_v32f16:
Expand Down Expand Up @@ -1059,8 +1059,8 @@ define void @fmin_v16f32(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fmin z0.s, p0/m, z0.s, z1.s
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fmin z1.s, p0/m, z1.s, z3.s
; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmin_v16f32:
Expand Down Expand Up @@ -1158,8 +1158,8 @@ define void @fmin_v8f64(ptr %a, ptr %b) #0 {
; VBITS_EQ_256-NEXT: fmin z0.d, p0/m, z0.d, z1.d
; VBITS_EQ_256-NEXT: movprfx z1, z2
; VBITS_EQ_256-NEXT: fmin z1.d, p0/m, z1.d, z3.d
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_EQ_256-NEXT: ret
;
; VBITS_GE_512-LABEL: fmin_v8f64:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -458,8 +458,8 @@ define void @mul_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mul z0.b, p0/m, z0.b, z1.b
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: mul z1.b, p0/m, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: mul_v64i8:
Expand Down Expand Up @@ -557,8 +557,8 @@ define void @mul_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mul z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: mul z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: mul_v32i16:
Expand Down Expand Up @@ -656,8 +656,8 @@ define void @mul_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mul z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: mul z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: mul_v16i32:
Expand Down Expand Up @@ -761,8 +761,8 @@ define void @mul_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mul z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: mul z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: mul_v8i64:
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ define void @smax_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smax z0.b, p0/m, z0.b, z1.b
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smax z1.b, p0/m, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smax_v64i8:
Expand Down Expand Up @@ -156,8 +156,8 @@ define void @smax_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smax z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smax z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smax_v32i16:
Expand Down Expand Up @@ -255,8 +255,8 @@ define void @smax_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smax z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smax z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smax_v16i32:
Expand Down Expand Up @@ -362,8 +362,8 @@ define void @smax_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smax z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smax z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smax_v8i64:
Expand Down Expand Up @@ -465,8 +465,8 @@ define void @smin_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smin z0.b, p0/m, z0.b, z1.b
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smin z1.b, p0/m, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smin_v64i8:
Expand Down Expand Up @@ -564,8 +564,8 @@ define void @smin_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smin z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smin z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smin_v32i16:
Expand Down Expand Up @@ -663,8 +663,8 @@ define void @smin_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smin z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smin z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smin_v16i32:
Expand Down Expand Up @@ -770,8 +770,8 @@ define void @smin_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smin z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smin z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smin_v8i64:
Expand Down Expand Up @@ -873,8 +873,8 @@ define void @umax_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umax z0.b, p0/m, z0.b, z1.b
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umax z1.b, p0/m, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umax_v64i8:
Expand Down Expand Up @@ -972,8 +972,8 @@ define void @umax_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umax z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umax z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umax_v32i16:
Expand Down Expand Up @@ -1071,8 +1071,8 @@ define void @umax_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umax z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umax z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umax_v16i32:
Expand Down Expand Up @@ -1178,8 +1178,8 @@ define void @umax_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umax z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umax z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umax_v8i64:
Expand Down Expand Up @@ -1281,8 +1281,8 @@ define void @umin_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umin z0.b, p0/m, z0.b, z1.b
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umin z1.b, p0/m, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umin_v64i8:
Expand Down Expand Up @@ -1380,8 +1380,8 @@ define void @umin_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umin z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umin z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umin_v32i16:
Expand Down Expand Up @@ -1479,8 +1479,8 @@ define void @umin_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umin z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umin z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umin_v16i32:
Expand Down Expand Up @@ -1586,8 +1586,8 @@ define void @umin_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umin z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umin z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umin_v8i64:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ define void @smulh_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smulh z0.b, p0/m, z0.b, z1.b
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smulh z1.b, p0/m, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smulh_v64i8:
Expand Down Expand Up @@ -215,8 +215,8 @@ define void @smulh_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smulh z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smulh z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smulh_v32i16:
Expand Down Expand Up @@ -346,8 +346,8 @@ define void @smulh_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smulh z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smulh z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smulh_v16i32:
Expand Down Expand Up @@ -479,8 +479,8 @@ define void @smulh_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: smulh z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: smulh z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: smulh_v8i64:
Expand Down Expand Up @@ -615,8 +615,8 @@ define void @umulh_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umulh z0.b, p0/m, z0.b, z1.b
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umulh z1.b, p0/m, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umulh_v64i8:
Expand Down Expand Up @@ -749,8 +749,8 @@ define void @umulh_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umulh z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umulh z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umulh_v32i16:
Expand Down Expand Up @@ -882,8 +882,8 @@ define void @umulh_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umulh z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umulh z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umulh_v16i32:
Expand Down Expand Up @@ -1013,8 +1013,8 @@ define void @umulh_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: umulh z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: umulh z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: umulh_v8i64:
Expand Down
96 changes: 48 additions & 48 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -533,9 +533,9 @@ define void @srem_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #0 {
; CHECK-LABEL: srem_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls v0.2s, v2.2s, v1.2s
Expand All @@ -549,9 +549,9 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #
define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #0 {
; CHECK-LABEL: srem_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s
Expand Down Expand Up @@ -582,25 +582,25 @@ define void @srem_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
define void @srem_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: srem_v16i32:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q0, q3, [x1]
; VBITS_GE_128-NEXT: ptrue p0.s, vl4
; VBITS_GE_128-NEXT: ldp q0, q3, [x1]
; VBITS_GE_128-NEXT: ldp q1, q2, [x0]
; VBITS_GE_128-NEXT: movprfx z4, z1
; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z0.s
; VBITS_GE_128-NEXT: ldp q16, q5, [x0, #32]
; VBITS_GE_128-NEXT: movprfx z19, z2
; VBITS_GE_128-NEXT: sdiv z19.s, p0/m, z19.s, z3.s
; VBITS_GE_128-NEXT: ldp q17, q6, [x1, #32]
; VBITS_GE_128-NEXT: movprfx z4, z1
; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z0.s
; VBITS_GE_128-NEXT: movprfx z7, z5
; VBITS_GE_128-NEXT: sdiv z7.s, p0/m, z7.s, z6.s
; VBITS_GE_128-NEXT: movprfx z18, z16
; VBITS_GE_128-NEXT: sdiv z18.s, p0/m, z18.s, z17.s
; VBITS_GE_128-NEXT: mls v1.4s, v4.4s, v0.4s
; VBITS_GE_128-NEXT: mls v2.4s, v19.4s, v3.4s
; VBITS_GE_128-NEXT: movprfx z19, z2
; VBITS_GE_128-NEXT: sdiv z19.s, p0/m, z19.s, z3.s
; VBITS_GE_128-NEXT: mls v16.4s, v18.4s, v17.4s
; VBITS_GE_128-NEXT: mls v5.4s, v7.4s, v6.4s
; VBITS_GE_128-NEXT: stp q1, q2, [x0]
; VBITS_GE_128-NEXT: mls v1.4s, v4.4s, v0.4s
; VBITS_GE_128-NEXT: mls v2.4s, v19.4s, v3.4s
; VBITS_GE_128-NEXT: stp q16, q5, [x0, #32]
; VBITS_GE_128-NEXT: stp q1, q2, [x0]
; VBITS_GE_128-NEXT: ret
;
; VBITS_GE_256-LABEL: srem_v16i32:
Expand All @@ -609,17 +609,17 @@ define void @srem_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: movprfx z2, z0
; VBITS_GE_256-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: movprfx z2, z0
; VBITS_GE_256-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
; VBITS_GE_256-NEXT: movprfx z5, z3
; VBITS_GE_256-NEXT: sdiv z5.s, p0/m, z5.s, z4.s
; VBITS_GE_256-NEXT: mls z0.s, p0/m, z2.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z3
; VBITS_GE_256-NEXT: mls z1.s, p0/m, z5.s, z4.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: srem_v16i32:
Expand Down Expand Up @@ -681,8 +681,8 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
; CHECK-LABEL: srem_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
Expand All @@ -698,8 +698,8 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #
; CHECK-LABEL: srem_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
Expand Down Expand Up @@ -733,16 +733,16 @@ define void @srem_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_128-NEXT: ptrue p0.d, vl2
; VBITS_GE_128-NEXT: ldp q0, q3, [x1]
; VBITS_GE_128-NEXT: ldp q1, q2, [x0]
; VBITS_GE_128-NEXT: movprfx z4, z1
; VBITS_GE_128-NEXT: sdiv z4.d, p0/m, z4.d, z0.d
; VBITS_GE_128-NEXT: ldp q16, q5, [x0, #32]
; VBITS_GE_128-NEXT: movprfx z19, z2
; VBITS_GE_128-NEXT: sdiv z19.d, p0/m, z19.d, z3.d
; VBITS_GE_128-NEXT: ldp q17, q6, [x1, #32]
; VBITS_GE_128-NEXT: movprfx z4, z1
; VBITS_GE_128-NEXT: sdiv z4.d, p0/m, z4.d, z0.d
; VBITS_GE_128-NEXT: movprfx z7, z5
; VBITS_GE_128-NEXT: sdiv z7.d, p0/m, z7.d, z6.d
; VBITS_GE_128-NEXT: movprfx z18, z16
; VBITS_GE_128-NEXT: sdiv z18.d, p0/m, z18.d, z17.d
; VBITS_GE_128-NEXT: movprfx z19, z2
; VBITS_GE_128-NEXT: sdiv z19.d, p0/m, z19.d, z3.d
; VBITS_GE_128-NEXT: mls z16.d, p0/m, z18.d, z17.d
; VBITS_GE_128-NEXT: mls z5.d, p0/m, z7.d, z6.d
; VBITS_GE_128-NEXT: msb z0.d, p0/m, z4.d, z1.d
Expand All @@ -758,17 +758,17 @@ define void @srem_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: movprfx z2, z0
; VBITS_GE_256-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: movprfx z2, z0
; VBITS_GE_256-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
; VBITS_GE_256-NEXT: movprfx z5, z3
; VBITS_GE_256-NEXT: sdiv z5.d, p0/m, z5.d, z4.d
; VBITS_GE_256-NEXT: mls z0.d, p0/m, z2.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z3
; VBITS_GE_256-NEXT: mls z1.d, p0/m, z5.d, z4.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: srem_v8i64:
Expand Down Expand Up @@ -1351,9 +1351,9 @@ define void @urem_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #0 {
; CHECK-LABEL: urem_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls v0.2s, v2.2s, v1.2s
Expand All @@ -1367,9 +1367,9 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #
define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #0 {
; CHECK-LABEL: urem_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s
Expand Down Expand Up @@ -1400,25 +1400,25 @@ define void @urem_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
define void @urem_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_128-LABEL: urem_v16i32:
; VBITS_GE_128: // %bb.0:
; VBITS_GE_128-NEXT: ldp q0, q3, [x1]
; VBITS_GE_128-NEXT: ptrue p0.s, vl4
; VBITS_GE_128-NEXT: ldp q0, q3, [x1]
; VBITS_GE_128-NEXT: ldp q1, q2, [x0]
; VBITS_GE_128-NEXT: movprfx z4, z1
; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z0.s
; VBITS_GE_128-NEXT: ldp q16, q5, [x0, #32]
; VBITS_GE_128-NEXT: movprfx z19, z2
; VBITS_GE_128-NEXT: udiv z19.s, p0/m, z19.s, z3.s
; VBITS_GE_128-NEXT: ldp q17, q6, [x1, #32]
; VBITS_GE_128-NEXT: movprfx z4, z1
; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z0.s
; VBITS_GE_128-NEXT: movprfx z7, z5
; VBITS_GE_128-NEXT: udiv z7.s, p0/m, z7.s, z6.s
; VBITS_GE_128-NEXT: movprfx z18, z16
; VBITS_GE_128-NEXT: udiv z18.s, p0/m, z18.s, z17.s
; VBITS_GE_128-NEXT: mls v1.4s, v4.4s, v0.4s
; VBITS_GE_128-NEXT: mls v2.4s, v19.4s, v3.4s
; VBITS_GE_128-NEXT: movprfx z19, z2
; VBITS_GE_128-NEXT: udiv z19.s, p0/m, z19.s, z3.s
; VBITS_GE_128-NEXT: mls v16.4s, v18.4s, v17.4s
; VBITS_GE_128-NEXT: mls v5.4s, v7.4s, v6.4s
; VBITS_GE_128-NEXT: stp q1, q2, [x0]
; VBITS_GE_128-NEXT: mls v1.4s, v4.4s, v0.4s
; VBITS_GE_128-NEXT: mls v2.4s, v19.4s, v3.4s
; VBITS_GE_128-NEXT: stp q16, q5, [x0, #32]
; VBITS_GE_128-NEXT: stp q1, q2, [x0]
; VBITS_GE_128-NEXT: ret
;
; VBITS_GE_256-LABEL: urem_v16i32:
Expand All @@ -1427,17 +1427,17 @@ define void @urem_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: movprfx z2, z0
; VBITS_GE_256-NEXT: udiv z2.s, p0/m, z2.s, z1.s
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: movprfx z2, z0
; VBITS_GE_256-NEXT: udiv z2.s, p0/m, z2.s, z1.s
; VBITS_GE_256-NEXT: movprfx z5, z3
; VBITS_GE_256-NEXT: udiv z5.s, p0/m, z5.s, z4.s
; VBITS_GE_256-NEXT: mls z0.s, p0/m, z2.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z3
; VBITS_GE_256-NEXT: mls z1.s, p0/m, z5.s, z4.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: urem_v16i32:
Expand Down Expand Up @@ -1499,8 +1499,8 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
; CHECK-LABEL: urem_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
Expand All @@ -1516,8 +1516,8 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #
; CHECK-LABEL: urem_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
Expand Down Expand Up @@ -1551,16 +1551,16 @@ define void @urem_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_128-NEXT: ptrue p0.d, vl2
; VBITS_GE_128-NEXT: ldp q0, q3, [x1]
; VBITS_GE_128-NEXT: ldp q1, q2, [x0]
; VBITS_GE_128-NEXT: movprfx z4, z1
; VBITS_GE_128-NEXT: udiv z4.d, p0/m, z4.d, z0.d
; VBITS_GE_128-NEXT: ldp q16, q5, [x0, #32]
; VBITS_GE_128-NEXT: movprfx z19, z2
; VBITS_GE_128-NEXT: udiv z19.d, p0/m, z19.d, z3.d
; VBITS_GE_128-NEXT: ldp q17, q6, [x1, #32]
; VBITS_GE_128-NEXT: movprfx z4, z1
; VBITS_GE_128-NEXT: udiv z4.d, p0/m, z4.d, z0.d
; VBITS_GE_128-NEXT: movprfx z7, z5
; VBITS_GE_128-NEXT: udiv z7.d, p0/m, z7.d, z6.d
; VBITS_GE_128-NEXT: movprfx z18, z16
; VBITS_GE_128-NEXT: udiv z18.d, p0/m, z18.d, z17.d
; VBITS_GE_128-NEXT: movprfx z19, z2
; VBITS_GE_128-NEXT: udiv z19.d, p0/m, z19.d, z3.d
; VBITS_GE_128-NEXT: mls z16.d, p0/m, z18.d, z17.d
; VBITS_GE_128-NEXT: mls z5.d, p0/m, z7.d, z6.d
; VBITS_GE_128-NEXT: msb z0.d, p0/m, z4.d, z1.d
Expand All @@ -1576,17 +1576,17 @@ define void @urem_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: movprfx z2, z0
; VBITS_GE_256-NEXT: udiv z2.d, p0/m, z2.d, z1.d
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: movprfx z2, z0
; VBITS_GE_256-NEXT: udiv z2.d, p0/m, z2.d, z1.d
; VBITS_GE_256-NEXT: movprfx z5, z3
; VBITS_GE_256-NEXT: udiv z5.d, p0/m, z5.d, z4.d
; VBITS_GE_256-NEXT: mls z0.d, p0/m, z2.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z3
; VBITS_GE_256-NEXT: mls z1.d, p0/m, z5.d, z4.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: urem_v8i64:
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ define void @ashr_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: asr z0.b, p0/m, z0.b, z1.b
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: asr z1.b, p0/m, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: ashr_v64i8:
Expand Down Expand Up @@ -160,8 +160,8 @@ define void @ashr_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: asr z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: asr z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: ashr_v32i16:
Expand Down Expand Up @@ -261,8 +261,8 @@ define void @ashr_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: asr z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: asr z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: ashr_v16i32:
Expand Down Expand Up @@ -362,8 +362,8 @@ define void @ashr_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: asr z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: asr z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: ashr_v8i64:
Expand Down Expand Up @@ -467,8 +467,8 @@ define void @lshr_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: lsr z0.b, p0/m, z0.b, z1.b
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: lsr z1.b, p0/m, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: lshr_v64i8:
Expand Down Expand Up @@ -568,8 +568,8 @@ define void @lshr_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: lsr z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: lshr_v32i16:
Expand Down Expand Up @@ -669,8 +669,8 @@ define void @lshr_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: lsr z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: lshr_v16i32:
Expand Down Expand Up @@ -770,8 +770,8 @@ define void @lshr_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: lsr z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: lsr z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: lshr_v8i64:
Expand Down Expand Up @@ -873,8 +873,8 @@ define void @shl_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: lsl z0.b, p0/m, z0.b, z1.b
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: lsl z1.b, p0/m, z1.b, z3.b
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: shl_v64i8:
Expand Down Expand Up @@ -972,8 +972,8 @@ define void @shl_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: lsl z1.h, p0/m, z1.h, z3.h
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: shl_v32i16:
Expand Down Expand Up @@ -1071,8 +1071,8 @@ define void @shl_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: lsl z1.s, p0/m, z1.s, z3.s
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: shl_v16i32:
Expand Down Expand Up @@ -1170,8 +1170,8 @@ define void @shl_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: lsl z0.d, p0/m, z0.d, z1.d
; VBITS_GE_256-NEXT: movprfx z1, z2
; VBITS_GE_256-NEXT: lsl z1.d, p0/m, z1.d, z3.d
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
;
; VBITS_GE_512-LABEL: shl_v8i64:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/sve-fpext-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ define <vscale x 8 x double> @ext8_f16_f64(ptr %ptr, i64 %index) {
; CHECK-NEXT: uunpkhi z1.d, z1.s
; CHECK-NEXT: uunpklo z3.d, z0.s
; CHECK-NEXT: uunpkhi z4.d, z0.s
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: fcvt z0.d, p0/m, z2.h
; CHECK-NEXT: movprfx z2, z3
; CHECK-NEXT: fcvt z2.d, p0/m, z3.h
; CHECK-NEXT: movprfx z3, z4
; CHECK-NEXT: fcvt z3.d, p0/m, z4.h
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
; CHECK-NEXT: ret
%load = load <vscale x 8 x half>, ptr %ptr, align 4
%load.ext = fpext <vscale x 8 x half> %load to <vscale x 8 x double>
Expand Down
70 changes: 35 additions & 35 deletions llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) {
; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff
; CHECK-NEXT: mov z6.s, #0x7fffffff
; CHECK-NEXT: mov z4.s, w8
; CHECK-NEXT: movprfx z5, z1
; CHECK-NEXT: fcvtzs z5.s, p0/m, z1.s
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z2.s
; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z2.s
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzs z2.s, p0/m, z0.s
; CHECK-NEXT: movprfx z5, z1
; CHECK-NEXT: fcvtzs z5.s, p0/m, z1.s
; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s
; CHECK-NEXT: fcmgt p4.s, p0/z, z1.s, z4.s
; CHECK-NEXT: not p1.b, p0/z, p1.b
Expand Down Expand Up @@ -134,17 +134,17 @@ define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #-956301312 // =0xc7000000
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: fcvtzs z4.s, p0/m, z0.s
; CHECK-NEXT: mov z5.s, #32767 // =0x7fff
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: mov w8, #65024 // =0xfe00
; CHECK-NEXT: mov z5.s, #32767 // =0x7fff
; CHECK-NEXT: movk w8, #18175, lsl #16
; CHECK-NEXT: mov z3.s, w8
; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s
; CHECK-NEXT: movprfx z2, z1
; CHECK-NEXT: fcvtzs z2.s, p0/m, z1.s
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: fcvtzs z4.s, p0/m, z0.s
; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z3.s
; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z3.s
; CHECK-NEXT: not p1.b, p0/z, p1.b
Expand Down Expand Up @@ -206,13 +206,13 @@ define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) {
; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff
; CHECK-NEXT: mov z3.d, #0x8000000000000000
; CHECK-NEXT: mov z4.s, w8
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.s
; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff
; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s
; CHECK-NEXT: movprfx z2, z1
; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.s
; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z4.s
; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z4.s
; CHECK-NEXT: not p1.b, p0/z, p1.b
Expand Down Expand Up @@ -283,13 +283,13 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000
; CHECK-NEXT: mov z6.d, #0x7fffffff
; CHECK-NEXT: movk x8, #16863, lsl #48
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.d
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d
; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z2.d
; CHECK-NEXT: movprfx z2, z1
; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.d
; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z4.d
; CHECK-NEXT: fcmgt p4.d, p0/z, z0.d, z4.d
; CHECK-NEXT: not p1.b, p0/z, p1.b
Expand Down Expand Up @@ -324,24 +324,24 @@ define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000
; CHECK-NEXT: movprfx z6, z1
; CHECK-NEXT: fcvtzs z6.d, p0/m, z1.d
; CHECK-NEXT: mov z26.d, #0x7fffffff
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000
; CHECK-NEXT: movprfx z7, z0
; CHECK-NEXT: fcvtzs z7.d, p0/m, z0.d
; CHECK-NEXT: movk x8, #16863, lsl #48
; CHECK-NEXT: mov z26.d, #0x7fffffff
; CHECK-NEXT: movprfx z24, z3
; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.d
; CHECK-NEXT: mov z5.d, x8
; CHECK-NEXT: movprfx z25, z2
; CHECK-NEXT: fcvtzs z25.d, p0/m, z2.d
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z4.d
; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d
; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, z4.d
; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, z4.d
; CHECK-NEXT: mov z4.d, #0xffffffff80000000
; CHECK-NEXT: movprfx z6, z1
; CHECK-NEXT: fcvtzs z6.d, p0/m, z1.d
; CHECK-NEXT: movprfx z7, z0
; CHECK-NEXT: fcvtzs z7.d, p0/m, z0.d
; CHECK-NEXT: movprfx z24, z3
; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.d
; CHECK-NEXT: movprfx z25, z2
; CHECK-NEXT: fcvtzs z25.d, p0/m, z2.d
; CHECK-NEXT: fcmgt p5.d, p0/z, z1.d, z5.d
; CHECK-NEXT: fcmgt p6.d, p0/z, z0.d, z5.d
; CHECK-NEXT: fcmgt p7.d, p0/z, z3.d, z5.d
Expand Down Expand Up @@ -389,17 +389,17 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d
; CHECK-NEXT: mov z5.d, #32767 // =0x7fff
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000
; CHECK-NEXT: mov z5.d, #32767 // =0x7fff
; CHECK-NEXT: movk x8, #16607, lsl #48
; CHECK-NEXT: mov z3.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d
; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z2.d
; CHECK-NEXT: movprfx z2, z1
; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d
; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z3.d
; CHECK-NEXT: fcmgt p4.d, p0/z, z0.d, z3.d
; CHECK-NEXT: not p1.b, p0/z, p1.b
Expand Down Expand Up @@ -434,23 +434,23 @@ define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000
; CHECK-NEXT: mov z25.d, #32767 // =0x7fff
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000
; CHECK-NEXT: movk x8, #16607, lsl #48
; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, z4.d
; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d
; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, z4.d
; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, z4.d
; CHECK-NEXT: movprfx z5, z3
; CHECK-NEXT: fcvtzs z5.d, p0/m, z3.d
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000
; CHECK-NEXT: movprfx z6, z2
; CHECK-NEXT: fcvtzs z6.d, p0/m, z2.d
; CHECK-NEXT: movk x8, #16607, lsl #48
; CHECK-NEXT: movprfx z7, z1
; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.d
; CHECK-NEXT: movprfx z24, z0
; CHECK-NEXT: fcvtzs z24.d, p0/m, z0.d
; CHECK-NEXT: mov z25.d, #32767 // =0x7fff
; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, z4.d
; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d
; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, z4.d
; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, z4.d
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z4.d
; CHECK-NEXT: fcmgt p6.d, p0/z, z2.d, z4.d
Expand Down Expand Up @@ -528,12 +528,12 @@ define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) {
; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff
; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: movprfx z5, z1
; CHECK-NEXT: fcvtzs z5.d, p0/m, z1.d
; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d
; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z2.d
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzs z2.d, p0/m, z0.d
; CHECK-NEXT: movprfx z5, z1
; CHECK-NEXT: fcvtzs z5.d, p0/m, z1.d
; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d
; CHECK-NEXT: fcmgt p4.d, p0/z, z1.d, z4.d
; CHECK-NEXT: not p1.b, p0/z, p1.b
Expand Down Expand Up @@ -629,13 +629,13 @@ define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: mov z3.s, #0x80000000
; CHECK-NEXT: mov z4.h, w8
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: fcvtzs z5.s, p0/m, z0.h
; CHECK-NEXT: mov z6.s, #0x7fffffff
; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h
; CHECK-NEXT: movprfx z2, z1
; CHECK-NEXT: fcvtzs z2.s, p0/m, z1.h
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: fcvtzs z5.s, p0/m, z0.h
; CHECK-NEXT: fcmgt p3.h, p0/z, z1.h, z4.h
; CHECK-NEXT: fcmgt p4.h, p0/z, z0.h, z4.h
; CHECK-NEXT: not p1.b, p0/z, p1.b
Expand Down Expand Up @@ -742,13 +742,13 @@ define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: mov z3.d, #0x8000000000000000
; CHECK-NEXT: mov z4.h, w8
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.h
; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff
; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h
; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h
; CHECK-NEXT: movprfx z2, z1
; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.h
; CHECK-NEXT: movprfx z5, z0
; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.h
; CHECK-NEXT: fcmgt p3.h, p0/z, z1.h, z4.h
; CHECK-NEXT: fcmgt p4.h, p0/z, z0.h, z4.h
; CHECK-NEXT: not p1.b, p0/z, p1.b
Expand Down
122 changes: 61 additions & 61 deletions llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ define <vscale x 2 x i32> @test_signed_v2f32_v2i32(<vscale x 2 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.s
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z0.d, #0xffffffff
Expand All @@ -35,10 +35,10 @@ define <vscale x 4 x i32> @test_signed_v4f32_v4i32(<vscale x 4 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s
; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
Expand All @@ -54,13 +54,13 @@ define <vscale x 8 x i32> @test_signed_v8f32_v8i32(<vscale x 8 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff
; CHECK-NEXT: mov z4.s, w8
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s
; CHECK-NEXT: mov z4.s, w8
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0
; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: not p2.b, p0/z, p2.b
Expand All @@ -81,11 +81,11 @@ define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #65280 // =0xff00
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s
; CHECK-NEXT: movk w8, #18303, lsl #16
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z0.s, #65535 // =0xffff
Expand All @@ -101,14 +101,14 @@ define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #65280 // =0xff00
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s
; CHECK-NEXT: movk w8, #18303, lsl #16
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: fcvtzu z4.s, p0/m, z0.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, #0.0
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: fcvtzu z4.s, p0/m, z0.s
; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z2.s
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: not p2.b, p0/z, p2.b
Expand All @@ -129,10 +129,10 @@ define <vscale x 2 x i64> @test_signed_v2f32_v2i64(<vscale x 2 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.s
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s
; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
Expand All @@ -150,13 +150,13 @@ define <vscale x 4 x i64> @test_signed_v4f32_v4i64(<vscale x 4 x float> %f) {
; CHECK-NEXT: uunpklo z2.d, z0.s
; CHECK-NEXT: uunpkhi z3.d, z0.s
; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff
; CHECK-NEXT: mov z4.s, w8
; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, #0.0
; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.s
; CHECK-NEXT: movprfx z1, z3
; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.s
; CHECK-NEXT: mov z4.s, w8
; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, #0.0
; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0
; CHECK-NEXT: fcmgt p3.s, p0/z, z2.s, z4.s
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: not p2.b, p0/z, p2.b
Expand Down Expand Up @@ -185,11 +185,11 @@ define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d
; CHECK-NEXT: mov z0.d, #0xffffffff
Expand All @@ -205,14 +205,14 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d
; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: not p2.b, p0/z, p2.b
Expand Down Expand Up @@ -240,20 +240,20 @@ define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, #0.0
; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, #0.0
; CHECK-NEXT: movprfx z5, z1
; CHECK-NEXT: fcvtzu z5.d, p0/m, z1.d
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: movprfx z6, z0
; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d
; CHECK-NEXT: movprfx z7, z3
; CHECK-NEXT: fcvtzu z7.d, p0/m, z3.d
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: movprfx z24, z2
; CHECK-NEXT: fcvtzu z24.d, p0/m, z2.d
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, #0.0
; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, #0.0
; CHECK-NEXT: fcmgt p5.d, p0/z, z1.d, z4.d
; CHECK-NEXT: fcmgt p6.d, p0/z, z0.d, z4.d
; CHECK-NEXT: mov z0.d, #0xffffffff
Expand Down Expand Up @@ -288,14 +288,14 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
; CHECK-NEXT: movk x8, #16623, lsl #48
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
; CHECK-NEXT: movprfx z4, z0
; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d
; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: not p2.b, p0/z, p2.b
Expand Down Expand Up @@ -323,20 +323,20 @@ define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000
; CHECK-NEXT: movk x8, #16623, lsl #48
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0
; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0
; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, #0.0
; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, #0.0
; CHECK-NEXT: movprfx z5, z3
; CHECK-NEXT: fcvtzu z5.d, p0/m, z3.d
; CHECK-NEXT: movk x8, #16623, lsl #48
; CHECK-NEXT: movprfx z6, z2
; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d
; CHECK-NEXT: movprfx z7, z1
; CHECK-NEXT: fcvtzu z7.d, p0/m, z1.d
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: movprfx z24, z0
; CHECK-NEXT: fcvtzu z24.d, p0/m, z0.d
; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0
; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0
; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, #0.0
; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, #0.0
; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z4.d
; CHECK-NEXT: fcmgt p6.d, p0/z, z2.d, z4.d
; CHECK-NEXT: mov z2.d, #65535 // =0xffff
Expand Down Expand Up @@ -372,10 +372,10 @@ define <vscale x 2 x i64> @test_signed_v2f64_v2i64(<vscale x 2 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d
; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
Expand All @@ -391,13 +391,13 @@ define <vscale x 4 x i64> @test_signed_v4f64_v4i64(<vscale x 4 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: movprfx z3, z1
; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d
; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0
; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: not p2.b, p0/z, p2.b
Expand Down Expand Up @@ -429,10 +429,10 @@ define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z0.d, #0xffffffff
Expand All @@ -448,10 +448,10 @@ define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h
; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
Expand All @@ -469,13 +469,13 @@ define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
; CHECK-NEXT: uunpklo z2.s, z0.h
; CHECK-NEXT: uunpkhi z3.s, z0.h
; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: mov z4.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0
; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.h
; CHECK-NEXT: movprfx z1, z3
; CHECK-NEXT: fcvtzu z1.s, p0/m, z3.h
; CHECK-NEXT: mov z4.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0
; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0
; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: not p2.b, p0/z, p2.b
Expand All @@ -494,10 +494,10 @@ define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z0.s, #65535 // =0xffff
Expand All @@ -513,10 +513,10 @@ define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h
; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0
Expand All @@ -532,10 +532,10 @@ define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h
; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0
Expand All @@ -553,13 +553,13 @@ define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
; CHECK-NEXT: uunpklo z2.d, z0.s
; CHECK-NEXT: uunpkhi z3.d, z0.s
; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: mov z4.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0
; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.h
; CHECK-NEXT: movprfx z1, z3
; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.h
; CHECK-NEXT: mov z4.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0
; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0
; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h
; CHECK-NEXT: not p1.b, p0/z, p1.b
; CHECK-NEXT: not p2.b, p0/z, p2.b
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,18 @@ define <vscale x 4 x i8> @gather_i8_index_offset_8(ptr %base, i64 %offset, <vsca
define void @scatter_f16_index_offset_var(ptr %base, i64 %offset, i64 %scale, <vscale x 4 x i1> %pg, <vscale x 4 x half> %data) #0 {
; CHECK-LABEL: scatter_f16_index_offset_var:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x1
; CHECK-NEXT: movprfx z4, z2
; CHECK-NEXT: mla z4.d, p1/m, z1.d, z2.d
; CHECK-NEXT: punpklo p2.h, p0.b
; CHECK-NEXT: uunpklo z3.d, z0.s
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: movprfx z4, z2
; CHECK-NEXT: mla z4.d, p1/m, z1.d, z2.d
; CHECK-NEXT: incd z1.d
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z4.d, lsl #1]
; CHECK-NEXT: mad z1.d, p1/m, z2.d, z2.d
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z4.d, lsl #1]
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, lsl #1]
; CHECK-NEXT: ret
%t0 = insertelement <vscale x 4 x i64> undef, i64 %offset, i32 0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ define <vscale x 4 x double> @masked_gather_nxv4f64(ptr %base, <vscale x 4 x i16
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: sxth z1.s, p1/m, z0.s
; CHECK-NEXT: sunpklo z0.d, z1.s
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: sunpkhi z1.d, z1.s
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: sunpklo z0.d, z1.s
; CHECK-NEXT: sunpkhi z1.d, z1.s
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0, z0.d, lsl #3]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, z1.d, lsl #3]
; CHECK-NEXT: ret
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ define <vscale x 64 x i8> @smulo_nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i
; CHECK-NEXT: mul z27.b, p0/m, z27.b, z5.b
; CHECK-NEXT: smulh z3.b, p0/m, z3.b, z7.b
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z4.b
; CHECK-NEXT: asr z4.b, z25.b, #7
; CHECK-NEXT: smulh z2.b, p0/m, z2.b, z6.b
; CHECK-NEXT: smulh z1.b, p0/m, z1.b, z5.b
; CHECK-NEXT: asr z4.b, z25.b, #7
; CHECK-NEXT: asr z5.b, z24.b, #7
; CHECK-NEXT: asr z6.b, z26.b, #7
; CHECK-NEXT: asr z7.b, z27.b, #7
Expand All @@ -140,13 +140,13 @@ define <vscale x 64 x i8> @smulo_nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i
; CHECK-NEXT: cmpne p3.b, p0/z, z2.b, z6.b
; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, z7.b
; CHECK-NEXT: mov z25.b, p1/m, #0 // =0x0
; CHECK-NEXT: mov z26.b, p3/m, #0 // =0x0
; CHECK-NEXT: mov z24.b, p2/m, #0 // =0x0
; CHECK-NEXT: mov z27.b, p0/m, #0 // =0x0
; CHECK-NEXT: mov z26.b, p3/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z2.d, z26.d
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: mov z1.d, z27.d
; CHECK-NEXT: mov z2.d, z26.d
; CHECK-NEXT: ret
%a = call { <vscale x 64 x i8>, <vscale x 64 x i1> } @llvm.smul.with.overflow.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %y)
%b = extractvalue { <vscale x 64 x i8>, <vscale x 64 x i1> } %a, 0
Expand Down Expand Up @@ -262,9 +262,9 @@ define <vscale x 32 x i16> @smulo_nxv32i16(<vscale x 32 x i16> %x, <vscale x 32
; CHECK-NEXT: mul z27.h, p0/m, z27.h, z5.h
; CHECK-NEXT: smulh z3.h, p0/m, z3.h, z7.h
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z4.h
; CHECK-NEXT: asr z4.h, z25.h, #15
; CHECK-NEXT: smulh z2.h, p0/m, z2.h, z6.h
; CHECK-NEXT: smulh z1.h, p0/m, z1.h, z5.h
; CHECK-NEXT: asr z4.h, z25.h, #15
; CHECK-NEXT: asr z5.h, z24.h, #15
; CHECK-NEXT: asr z6.h, z26.h, #15
; CHECK-NEXT: asr z7.h, z27.h, #15
Expand All @@ -273,13 +273,13 @@ define <vscale x 32 x i16> @smulo_nxv32i16(<vscale x 32 x i16> %x, <vscale x 32
; CHECK-NEXT: cmpne p3.h, p0/z, z2.h, z6.h
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, z7.h
; CHECK-NEXT: mov z25.h, p1/m, #0 // =0x0
; CHECK-NEXT: mov z26.h, p3/m, #0 // =0x0
; CHECK-NEXT: mov z24.h, p2/m, #0 // =0x0
; CHECK-NEXT: mov z27.h, p0/m, #0 // =0x0
; CHECK-NEXT: mov z26.h, p3/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z2.d, z26.d
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: mov z1.d, z27.d
; CHECK-NEXT: mov z2.d, z26.d
; CHECK-NEXT: ret
%a = call { <vscale x 32 x i16>, <vscale x 32 x i1> } @llvm.smul.with.overflow.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %y)
%b = extractvalue { <vscale x 32 x i16>, <vscale x 32 x i1> } %a, 0
Expand Down Expand Up @@ -374,9 +374,9 @@ define <vscale x 16 x i32> @smulo_nxv16i32(<vscale x 16 x i32> %x, <vscale x 16
; CHECK-NEXT: mul z27.s, p0/m, z27.s, z5.s
; CHECK-NEXT: smulh z3.s, p0/m, z3.s, z7.s
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: asr z4.s, z25.s, #31
; CHECK-NEXT: smulh z2.s, p0/m, z2.s, z6.s
; CHECK-NEXT: smulh z1.s, p0/m, z1.s, z5.s
; CHECK-NEXT: asr z4.s, z25.s, #31
; CHECK-NEXT: asr z5.s, z24.s, #31
; CHECK-NEXT: asr z6.s, z26.s, #31
; CHECK-NEXT: asr z7.s, z27.s, #31
Expand All @@ -385,13 +385,13 @@ define <vscale x 16 x i32> @smulo_nxv16i32(<vscale x 16 x i32> %x, <vscale x 16
; CHECK-NEXT: cmpne p3.s, p0/z, z2.s, z6.s
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, z7.s
; CHECK-NEXT: mov z25.s, p1/m, #0 // =0x0
; CHECK-NEXT: mov z26.s, p3/m, #0 // =0x0
; CHECK-NEXT: mov z24.s, p2/m, #0 // =0x0
; CHECK-NEXT: mov z27.s, p0/m, #0 // =0x0
; CHECK-NEXT: mov z26.s, p3/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z2.d, z26.d
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: mov z1.d, z27.d
; CHECK-NEXT: mov z2.d, z26.d
; CHECK-NEXT: ret
%a = call { <vscale x 16 x i32>, <vscale x 16 x i1> } @llvm.smul.with.overflow.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y)
%b = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i1> } %a, 0
Expand Down Expand Up @@ -465,9 +465,9 @@ define <vscale x 8 x i64> @smulo_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i6
; CHECK-NEXT: mul z27.d, p0/m, z27.d, z5.d
; CHECK-NEXT: smulh z3.d, p0/m, z3.d, z7.d
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z4.d
; CHECK-NEXT: asr z4.d, z25.d, #63
; CHECK-NEXT: smulh z2.d, p0/m, z2.d, z6.d
; CHECK-NEXT: smulh z1.d, p0/m, z1.d, z5.d
; CHECK-NEXT: asr z4.d, z25.d, #63
; CHECK-NEXT: asr z5.d, z24.d, #63
; CHECK-NEXT: asr z6.d, z26.d, #63
; CHECK-NEXT: asr z7.d, z27.d, #63
Expand All @@ -476,13 +476,13 @@ define <vscale x 8 x i64> @smulo_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i6
; CHECK-NEXT: cmpne p3.d, p0/z, z2.d, z6.d
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, z7.d
; CHECK-NEXT: mov z25.d, p1/m, #0 // =0x0
; CHECK-NEXT: mov z26.d, p3/m, #0 // =0x0
; CHECK-NEXT: mov z24.d, p2/m, #0 // =0x0
; CHECK-NEXT: mov z27.d, p0/m, #0 // =0x0
; CHECK-NEXT: mov z26.d, p3/m, #0 // =0x0
; CHECK-NEXT: mov z0.d, z25.d
; CHECK-NEXT: mov z2.d, z26.d
; CHECK-NEXT: mov z3.d, z24.d
; CHECK-NEXT: mov z1.d, z27.d
; CHECK-NEXT: mov z2.d, z26.d
; CHECK-NEXT: ret
%a = call { <vscale x 8 x i64>, <vscale x 8 x i1> } @llvm.smul.with.overflow.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y)
%b = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i1> } %a, 0
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/CodeGen/AArch64/sve-split-fcvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
define <vscale x 8 x float> @fcvts_nxv8f16(<vscale x 8 x half> %a) {
; CHECK-LABEL: fcvts_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z1.s, z0.h
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpklo z1.s, z0.h
; CHECK-NEXT: uunpkhi z2.s, z0.h
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvt z0.s, p0/m, z1.h
Expand All @@ -21,8 +21,8 @@ define <vscale x 8 x float> @fcvts_nxv8f16(<vscale x 8 x half> %a) {
define <vscale x 4 x double> @fcvtd_nxv4f16(<vscale x 4 x half> %a) {
; CHECK-LABEL: fcvtd_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: uunpkhi z2.d, z0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvt z0.d, p0/m, z1.h
Expand All @@ -43,13 +43,13 @@ define <vscale x 8 x double> @fcvtd_nxv8f16(<vscale x 8 x half> %a) {
; CHECK-NEXT: uunpkhi z1.d, z1.s
; CHECK-NEXT: uunpklo z3.d, z0.s
; CHECK-NEXT: uunpkhi z4.d, z0.s
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: fcvt z0.d, p0/m, z2.h
; CHECK-NEXT: movprfx z2, z3
; CHECK-NEXT: fcvt z2.d, p0/m, z3.h
; CHECK-NEXT: movprfx z3, z4
; CHECK-NEXT: fcvt z3.d, p0/m, z4.h
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
; CHECK-NEXT: ret
%res = fpext <vscale x 8 x half> %a to <vscale x 8 x double>
ret <vscale x 8 x double> %res
Expand All @@ -58,8 +58,8 @@ define <vscale x 8 x double> @fcvtd_nxv8f16(<vscale x 8 x half> %a) {
define <vscale x 4 x double> @fcvtd_nxv4f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: fcvtd_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: uunpkhi z2.d, z0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvt z0.d, p0/m, z1.s
Expand All @@ -73,17 +73,17 @@ define <vscale x 4 x double> @fcvtd_nxv4f32(<vscale x 4 x float> %a) {
define <vscale x 8 x double> @fcvtd_nxv8f32(<vscale x 8 x float> %a) {
; CHECK-LABEL: fcvtd_nxv8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z2.d, z0.s
; CHECK-NEXT: uunpkhi z3.d, z0.s
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z4.d, z1.s
; CHECK-NEXT: uunpkhi z5.d, z1.s
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: fcvt z0.d, p0/m, z2.s
; CHECK-NEXT: movprfx z2, z4
; CHECK-NEXT: fcvt z2.d, p0/m, z4.s
; CHECK-NEXT: uunpkhi z5.d, z1.s
; CHECK-NEXT: movprfx z1, z3
; CHECK-NEXT: fcvt z1.d, p0/m, z3.s
; CHECK-NEXT: movprfx z2, z4
; CHECK-NEXT: fcvt z2.d, p0/m, z4.s
; CHECK-NEXT: movprfx z3, z5
; CHECK-NEXT: fcvt z3.d, p0/m, z5.s
; CHECK-NEXT: ret
Expand Down Expand Up @@ -195,8 +195,8 @@ define <vscale x 8 x i16> @fcvtzs_h_nxv8f64(<vscale x 8 x double> %a) {
define <vscale x 4 x i64> @fcvtzs_d_nxv4f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: fcvtzs_d_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: uunpkhi z2.d, z0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
Expand All @@ -210,17 +210,17 @@ define <vscale x 4 x i64> @fcvtzs_d_nxv4f32(<vscale x 4 x float> %a) {
define <vscale x 16 x i32> @fcvtzs_s_nxv16f16(<vscale x 16 x half> %a) {
; CHECK-LABEL: fcvtzs_s_nxv16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpklo z2.s, z0.h
; CHECK-NEXT: uunpkhi z3.s, z0.h
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uunpklo z4.s, z1.h
; CHECK-NEXT: uunpkhi z5.s, z1.h
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h
; CHECK-NEXT: movprfx z2, z4
; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h
; CHECK-NEXT: uunpkhi z5.s, z1.h
; CHECK-NEXT: movprfx z1, z3
; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h
; CHECK-NEXT: movprfx z2, z4
; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h
; CHECK-NEXT: movprfx z3, z5
; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h
; CHECK-NEXT: ret
Expand All @@ -247,8 +247,8 @@ define <vscale x 4 x i32> @fcvtzu_s_nxv4f64(<vscale x 4 x double> %a) {
define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: fcvtzu_d_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: uunpkhi z2.d, z0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s
Expand Down Expand Up @@ -301,13 +301,13 @@ define <vscale x 16 x float> @scvtf_s_nxv16i8(<vscale x 16 x i8> %a) {
; CHECK-NEXT: sunpkhi z1.s, z1.h
; CHECK-NEXT: sunpklo z3.s, z0.h
; CHECK-NEXT: sunpkhi z4.s, z0.h
; CHECK-NEXT: scvtf z1.s, p0/m, z1.s
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: scvtf z0.s, p0/m, z2.s
; CHECK-NEXT: movprfx z2, z3
; CHECK-NEXT: scvtf z2.s, p0/m, z3.s
; CHECK-NEXT: movprfx z3, z4
; CHECK-NEXT: scvtf z3.s, p0/m, z4.s
; CHECK-NEXT: scvtf z1.s, p0/m, z1.s
; CHECK-NEXT: ret
%res = sitofp <vscale x 16 x i8> %a to <vscale x 16 x float>
ret <vscale x 16 x float> %res
Expand All @@ -316,8 +316,8 @@ define <vscale x 16 x float> @scvtf_s_nxv16i8(<vscale x 16 x i8> %a) {
define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: scvtf_d_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sunpklo z1.d, z0.s
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sunpklo z1.d, z0.s
; CHECK-NEXT: sunpkhi z2.d, z0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: scvtf z0.d, p0/m, z1.d
Expand Down Expand Up @@ -378,8 +378,8 @@ define <vscale x 8 x half> @ucvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: ucvtf_d_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: uunpkhi z2.d, z0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,10 @@ define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) {
; CHECK-NEXT: mov x8, #4 // =0x4
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
; CHECK-NEXT: ld1h { z4.d }, p0/z, [x0, x8, lsl #1]
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
; CHECK-NEXT: mov x8, #6 // =0x6
; CHECK-NEXT: fcvt z2.d, p0/m, z2.h
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
; CHECK-NEXT: ld1h { z5.d }, p0/z, [x0, x8, lsl #1]
; CHECK-NEXT: fcvt z2.d, p0/m, z2.h
; CHECK-NEXT: mov x8, #2 // =0x2
; CHECK-NEXT: fcvt z3.d, p0/m, z3.h
; CHECK-NEXT: ld1h { z7.d }, p0/z, [x0, x8, lsl #1]
Expand All @@ -255,8 +255,8 @@ define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q2, q3, [x1, #64]
; CHECK-NEXT: movprfx z2, z7
; CHECK-NEXT: fcvt z2.d, p0/m, z7.h
; CHECK-NEXT: stp q1, q2, [x1]
; CHECK-NEXT: stp q4, q0, [x1, #32]
; CHECK-NEXT: stp q1, q2, [x1]
; CHECK-NEXT: ret
%op1 = load <16 x half>, ptr %a
%res = fpext <16 x half> %op1 to <16 x double>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -750,14 +750,14 @@ define void @udiv_constantsplat_v8i32(ptr %a) {
; SVE-NEXT: mov z0.s, w8
; SVE-NEXT: movprfx z3, z1
; SVE-NEXT: umulh z3.s, p0/m, z3.s, z0.s
; SVE-NEXT: sub z1.s, z1.s, z3.s
; SVE-NEXT: umulh z0.s, p0/m, z0.s, z2.s
; SVE-NEXT: lsr z1.s, z1.s, #1
; SVE-NEXT: sub z1.s, z1.s, z3.s
; SVE-NEXT: sub z2.s, z2.s, z0.s
; SVE-NEXT: add z1.s, z1.s, z3.s
; SVE-NEXT: lsr z1.s, z1.s, #1
; SVE-NEXT: lsr z2.s, z2.s, #1
; SVE-NEXT: lsr z1.s, z1.s, #6
; SVE-NEXT: add z1.s, z1.s, z3.s
; SVE-NEXT: add z0.s, z2.s, z0.s
; SVE-NEXT: lsr z1.s, z1.s, #6
; SVE-NEXT: lsr z0.s, z0.s, #6
; SVE-NEXT: stp q1, q0, [x0]
; SVE-NEXT: ret
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ define void @srem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z4.s
; CHECK-NEXT: ldr q3, [x0]
; CHECK-NEXT: ldr q4, [x1]
; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
; CHECK-NEXT: mov z18.d, z3.d
; CHECK-NEXT: mov z17.d, z4.d
; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
Expand All @@ -154,6 +153,7 @@ define void @srem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
; CHECK-NEXT: sunpklo z18.s, z18.h
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
; CHECK-NEXT: sunpklo z17.s, z17.h
; CHECK-NEXT: sdivr z19.s, p0/m, z19.s, z20.s
; CHECK-NEXT: sunpklo z20.h, z3.b
Expand All @@ -172,21 +172,21 @@ define void @srem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: sdivr z18.s, p0/m, z18.s, z20.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z19.h, z21.h, z21.h
; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h
; CHECK-NEXT: splice z2.h, p0, z2.h, z5.h
; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h
; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h
; CHECK-NEXT: uzp1 z5.b, z16.b, z16.b
; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b
; CHECK-NEXT: uzp1 z5.b, z16.b, z16.b
; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h
; CHECK-NEXT: splice z19.h, p0, z19.h, z18.h
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z7.b, z19.b, z19.b
; CHECK-NEXT: splice z6.b, p0, z6.b, z2.b
; CHECK-NEXT: splice z7.b, p0, z7.b, z5.b
; CHECK-NEXT: mls z0.b, p1/m, z6.b, z1.b
; CHECK-NEXT: movprfx z2, z3
; CHECK-NEXT: mls z2.b, p1/m, z7.b, z4.b
; CHECK-NEXT: mls z0.b, p1/m, z6.b, z1.b
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
Expand Down Expand Up @@ -293,8 +293,8 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-LABEL: srem_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
Expand All @@ -308,8 +308,8 @@ define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-LABEL: srem_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
Expand Down Expand Up @@ -345,8 +345,8 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-LABEL: srem_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
Expand All @@ -360,8 +360,8 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-LABEL: srem_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
Expand Down Expand Up @@ -528,7 +528,6 @@ define void @urem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z4.s
; CHECK-NEXT: ldr q3, [x0]
; CHECK-NEXT: ldr q4, [x1]
; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
; CHECK-NEXT: mov z18.d, z3.d
; CHECK-NEXT: mov z17.d, z4.d
; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
Expand All @@ -543,6 +542,7 @@ define void @urem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s
; CHECK-NEXT: uunpklo z18.s, z18.h
; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
; CHECK-NEXT: uunpklo z17.s, z17.h
; CHECK-NEXT: udivr z19.s, p0/m, z19.s, z20.s
; CHECK-NEXT: uunpklo z20.h, z3.b
Expand All @@ -561,21 +561,21 @@ define void @urem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: udivr z18.s, p0/m, z18.s, z20.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z19.h, z21.h, z21.h
; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h
; CHECK-NEXT: splice z2.h, p0, z2.h, z5.h
; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h
; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h
; CHECK-NEXT: uzp1 z5.b, z16.b, z16.b
; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b
; CHECK-NEXT: uzp1 z5.b, z16.b, z16.b
; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h
; CHECK-NEXT: splice z19.h, p0, z19.h, z18.h
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z7.b, z19.b, z19.b
; CHECK-NEXT: splice z6.b, p0, z6.b, z2.b
; CHECK-NEXT: splice z7.b, p0, z7.b, z5.b
; CHECK-NEXT: mls z0.b, p1/m, z6.b, z1.b
; CHECK-NEXT: movprfx z2, z3
; CHECK-NEXT: mls z2.b, p1/m, z7.b, z4.b
; CHECK-NEXT: mls z0.b, p1/m, z6.b, z1.b
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
Expand Down Expand Up @@ -682,8 +682,8 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-LABEL: urem_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
Expand All @@ -697,8 +697,8 @@ define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-LABEL: urem_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
Expand Down Expand Up @@ -734,8 +734,8 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-LABEL: urem_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
Expand All @@ -749,8 +749,8 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-LABEL: urem_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,21 +221,21 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) {
; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: ucvtf z2.d, p0/m, z2.d
; CHECK-NEXT: ucvtf z3.d, p0/m, z3.d
; CHECK-NEXT: uunpklo z7.d, z7.s
; CHECK-NEXT: ucvtf z3.d, p0/m, z3.d
; CHECK-NEXT: uunpklo z5.d, z5.s
; CHECK-NEXT: uunpklo z6.d, z6.s
; CHECK-NEXT: ucvtf z4.d, p0/m, z4.d
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: uunpklo z6.d, z6.s
; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d
; CHECK-NEXT: ucvtf z5.d, p0/m, z5.d
; CHECK-NEXT: stp q2, q4, [x1, #64]
; CHECK-NEXT: movprfx z2, z6
; CHECK-NEXT: ucvtf z2.d, p0/m, z6.d
; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: stp q0, q5, [x1, #96]
; CHECK-NEXT: movprfx z0, z7
; CHECK-NEXT: ucvtf z0.d, p0/m, z7.d
; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: stp q3, q0, [x1]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
Expand Down Expand Up @@ -481,10 +481,10 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) {
; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvt z0.h, p1/m, z1.s
; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: fcvt z0.h, p1/m, z1.s
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fcvt z1.h, p1/m, z2.s
; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
Expand Down Expand Up @@ -794,21 +794,21 @@ define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) {
; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8
; CHECK-NEXT: sunpklo z1.d, z1.s
; CHECK-NEXT: scvtf z2.d, p0/m, z2.d
; CHECK-NEXT: scvtf z3.d, p0/m, z3.d
; CHECK-NEXT: sunpklo z7.d, z7.s
; CHECK-NEXT: scvtf z3.d, p0/m, z3.d
; CHECK-NEXT: sunpklo z5.d, z5.s
; CHECK-NEXT: sunpklo z6.d, z6.s
; CHECK-NEXT: scvtf z4.d, p0/m, z4.d
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: sunpklo z6.d, z6.s
; CHECK-NEXT: scvtf z1.d, p0/m, z1.d
; CHECK-NEXT: scvtf z5.d, p0/m, z5.d
; CHECK-NEXT: stp q2, q4, [x1, #64]
; CHECK-NEXT: movprfx z2, z6
; CHECK-NEXT: scvtf z2.d, p0/m, z6.d
; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: stp q0, q5, [x1, #96]
; CHECK-NEXT: movprfx z0, z7
; CHECK-NEXT: scvtf z0.d, p0/m, z7.d
; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: stp q3, q0, [x1]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
Expand Down Expand Up @@ -982,16 +982,16 @@ define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) {
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: sunpklo z1.d, z1.s
; CHECK-NEXT: ext z6.b, z6.b, z4.b, #8
; CHECK-NEXT: sunpklo z4.d, z4.s
; CHECK-NEXT: ext z7.b, z7.b, z5.b, #8
; CHECK-NEXT: sunpklo z4.d, z4.s
; CHECK-NEXT: sunpklo z5.d, z5.s
; CHECK-NEXT: sunpklo z2.d, z2.s
; CHECK-NEXT: sunpklo z3.d, z3.s
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: sunpklo z6.d, z6.s
; CHECK-NEXT: sunpklo z7.d, z7.s
; CHECK-NEXT: scvtf z1.d, p0/m, z1.d
; CHECK-NEXT: scvtf z4.d, p0/m, z4.d
; CHECK-NEXT: sunpklo z7.d, z7.s
; CHECK-NEXT: scvtf z2.d, p0/m, z2.d
; CHECK-NEXT: scvtf z3.d, p0/m, z3.d
; CHECK-NEXT: stp q1, q3, [x1, #64]
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,16 @@ define <vscale x 64 x i8> @umulo_nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i
; CHECK-NEXT: umulh z27.b, p0/m, z27.b, z5.b
; CHECK-NEXT: mul z3.b, p0/m, z3.b, z7.b
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z4.b
; CHECK-NEXT: cmpne p1.b, p0/z, z25.b, #0
; CHECK-NEXT: mul z2.b, p0/m, z2.b, z6.b
; CHECK-NEXT: mul z1.b, p0/m, z1.b, z5.b
; CHECK-NEXT: cmpne p1.b, p0/z, z25.b, #0
; CHECK-NEXT: cmpne p2.b, p0/z, z24.b, #0
; CHECK-NEXT: cmpne p3.b, p0/z, z26.b, #0
; CHECK-NEXT: cmpne p0.b, p0/z, z27.b, #0
; CHECK-NEXT: mov z0.b, p1/m, #0 // =0x0
; CHECK-NEXT: mov z2.b, p3/m, #0 // =0x0
; CHECK-NEXT: mov z3.b, p2/m, #0 // =0x0
; CHECK-NEXT: mov z1.b, p0/m, #0 // =0x0
; CHECK-NEXT: mov z2.b, p3/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 64 x i8>, <vscale x 64 x i1> } @llvm.umul.with.overflow.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %y)
%b = extractvalue { <vscale x 64 x i8>, <vscale x 64 x i1> } %a, 0
Expand Down Expand Up @@ -237,16 +237,16 @@ define <vscale x 32 x i16> @umulo_nxv32i16(<vscale x 32 x i16> %x, <vscale x 32
; CHECK-NEXT: umulh z27.h, p0/m, z27.h, z5.h
; CHECK-NEXT: mul z3.h, p0/m, z3.h, z7.h
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z4.h
; CHECK-NEXT: cmpne p1.h, p0/z, z25.h, #0
; CHECK-NEXT: mul z2.h, p0/m, z2.h, z6.h
; CHECK-NEXT: mul z1.h, p0/m, z1.h, z5.h
; CHECK-NEXT: cmpne p1.h, p0/z, z25.h, #0
; CHECK-NEXT: cmpne p2.h, p0/z, z24.h, #0
; CHECK-NEXT: cmpne p3.h, p0/z, z26.h, #0
; CHECK-NEXT: cmpne p0.h, p0/z, z27.h, #0
; CHECK-NEXT: mov z0.h, p1/m, #0 // =0x0
; CHECK-NEXT: mov z2.h, p3/m, #0 // =0x0
; CHECK-NEXT: mov z3.h, p2/m, #0 // =0x0
; CHECK-NEXT: mov z1.h, p0/m, #0 // =0x0
; CHECK-NEXT: mov z2.h, p3/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 32 x i16>, <vscale x 32 x i1> } @llvm.umul.with.overflow.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %y)
%b = extractvalue { <vscale x 32 x i16>, <vscale x 32 x i1> } %a, 0
Expand Down Expand Up @@ -334,16 +334,16 @@ define <vscale x 16 x i32> @umulo_nxv16i32(<vscale x 16 x i32> %x, <vscale x 16
; CHECK-NEXT: umulh z27.s, p0/m, z27.s, z5.s
; CHECK-NEXT: mul z3.s, p0/m, z3.s, z7.s
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: cmpne p1.s, p0/z, z25.s, #0
; CHECK-NEXT: mul z2.s, p0/m, z2.s, z6.s
; CHECK-NEXT: mul z1.s, p0/m, z1.s, z5.s
; CHECK-NEXT: cmpne p1.s, p0/z, z25.s, #0
; CHECK-NEXT: cmpne p2.s, p0/z, z24.s, #0
; CHECK-NEXT: cmpne p3.s, p0/z, z26.s, #0
; CHECK-NEXT: cmpne p0.s, p0/z, z27.s, #0
; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0
; CHECK-NEXT: mov z2.s, p3/m, #0 // =0x0
; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0
; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0
; CHECK-NEXT: mov z2.s, p3/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 16 x i32>, <vscale x 16 x i1> } @llvm.umul.with.overflow.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y)
%b = extractvalue { <vscale x 16 x i32>, <vscale x 16 x i1> } %a, 0
Expand Down Expand Up @@ -411,16 +411,16 @@ define <vscale x 8 x i64> @umulo_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i6
; CHECK-NEXT: umulh z27.d, p0/m, z27.d, z5.d
; CHECK-NEXT: mul z3.d, p0/m, z3.d, z7.d
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z4.d
; CHECK-NEXT: cmpne p1.d, p0/z, z25.d, #0
; CHECK-NEXT: mul z2.d, p0/m, z2.d, z6.d
; CHECK-NEXT: mul z1.d, p0/m, z1.d, z5.d
; CHECK-NEXT: cmpne p1.d, p0/z, z25.d, #0
; CHECK-NEXT: cmpne p2.d, p0/z, z24.d, #0
; CHECK-NEXT: cmpne p3.d, p0/z, z26.d, #0
; CHECK-NEXT: cmpne p0.d, p0/z, z27.d, #0
; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0
; CHECK-NEXT: mov z2.d, p3/m, #0 // =0x0
; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0
; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0
; CHECK-NEXT: mov z2.d, p3/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 8 x i64>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y)
%b = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i1> } %a, 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ define i32 @test(<vscale x 32 x i8> %bin.rdx, <vscale x 32 x i8> %bin.rdx2) {
; CHECK-NEXT: sunpkhi z24.s, z3.h
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: sunpkhi z2.s, z7.h
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sunpklo z7.s, z7.h
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: mla z0.s, p0/m, z25.s, z24.s
; CHECK-NEXT: mad z2.s, p0/m, z6.s, z4.s
; CHECK-NEXT: mad z1.s, p0/m, z3.s, z26.s
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/sve2-xar.ll
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ define <vscale x 2 x i64> @xar_nxv2i64_l_neg1(<vscale x 2 x i64> %x, <vscale x 2
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: and z2.d, z2.d, #0x3f
; CHECK-NEXT: and z3.d, z3.d, #0x3f
; CHECK-NEXT: and z2.d, z2.d, #0x3f
; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/CodeGen/NVPTX/bf16-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,15 @@ define bfloat @test_uitofp_i64(i64 %a) {
%r = uitofp i64 %a to bfloat
ret bfloat %r
}

; CHECK-LABEL: test_roundeven(
; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_roundeven_param_0];
; SM80: cvt.rni.f32.f32 [[F:%f[0-9]+]]
; SM80: cvt.rn.bf16.f32 [[R:%rs[0-9]+]], [[F]];
; SM90: cvt.rni.bf16.bf16 [[R:%rs[0-9]+]], [[A]];
; CHECK: st.param.b16 [func_retval0+0], [[R]];
; CHECK: ret;
define bfloat @test_roundeven(bfloat %a) {
%r = call bfloat @llvm.roundeven.bf16(bfloat %a)
ret bfloat %r
}
2 changes: 2 additions & 0 deletions llvm/test/Transforms/CodeGenPrepare/LoongArch/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if not "LoongArch" in config.root.targets:
config.unsupported = True
34 changes: 34 additions & 0 deletions llvm/test/Transforms/CodeGenPrepare/LoongArch/splitgep.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; RUN: opt --mtriple=loongarch64 -S --passes='require<profile-summary>,function(codegenprepare)' %s | FileCheck %s

; Check that we have deterministic output
define void @test(ptr %sp, ptr %t, i32 %n) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: %s = load ptr, ptr %sp, align 8
; CHECK-NEXT: br label %while_cond

entry:
%s = load ptr, ptr %sp
br label %while_cond

while_cond:
%phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
%gep0 = getelementptr [65536 x i32], ptr %s, i64 0, i64 20000
%gep1 = getelementptr [65536 x i32], ptr %s, i64 0, i64 20001
%gep2 = getelementptr [65536 x i32], ptr %t, i64 0, i64 20000
%gep3 = getelementptr [65536 x i32], ptr %t, i64 0, i64 20001
%cmp = icmp slt i32 %phi, %n
br i1 %cmp, label %while_body, label %while_end

while_body:
%i = add i32 %phi, 1
store i32 %i, ptr %gep0
store i32 %phi, ptr %gep1
store i32 %i, ptr %gep2
store i32 %phi, ptr %gep3
br label %while_cond

while_end:
ret void
}