384 changes: 192 additions & 192 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll

Large diffs are not rendered by default.

428 changes: 214 additions & 214 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll

Large diffs are not rendered by default.

128 changes: 64 additions & 64 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
ret <16 x i8> %sel
}

define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
define void @select_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
Expand All @@ -39,15 +39,15 @@ define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b) vscale_range(2,0) #0 {
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, <32 x i8>* %a
%op2 = load <32 x i8>, <32 x i8>* %b
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%mask = icmp eq <32 x i8> %op1, %op2
%sel = select <32 x i1> %mask, <32 x i8> %op1, <32 x i8> %op2
store <32 x i8> %sel, <32 x i8>* %a
store <32 x i8> %sel, ptr %a
ret void
}

define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
define void @select_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
Expand All @@ -73,15 +73,15 @@ define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <64 x i8>, <64 x i8>* %a
%op2 = load <64 x i8>, <64 x i8>* %b
%op1 = load <64 x i8>, ptr %a
%op2 = load <64 x i8>, ptr %b
%mask = icmp eq <64 x i8> %op1, %op2
%sel = select <64 x i1> %mask, <64 x i8> %op1, <64 x i8> %op2
store <64 x i8> %sel, <64 x i8>* %a
store <64 x i8> %sel, ptr %a
ret void
}

define void @select_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0 {
define void @select_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
Expand All @@ -91,15 +91,15 @@ define void @select_v128i8(<128 x i8>* %a, <128 x i8>* %b) vscale_range(8,0) #0
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <128 x i8>, <128 x i8>* %a
%op2 = load <128 x i8>, <128 x i8>* %b
%op1 = load <128 x i8>, ptr %a
%op2 = load <128 x i8>, ptr %b
%mask = icmp eq <128 x i8> %op1, %op2
%sel = select <128 x i1> %mask, <128 x i8> %op1, <128 x i8> %op2
store <128 x i8> %sel, <128 x i8>* %a
store <128 x i8> %sel, ptr %a
ret void
}

define void @select_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0 {
define void @select_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
Expand All @@ -109,11 +109,11 @@ define void @select_v256i8(<256 x i8>* %a, <256 x i8>* %b) vscale_range(16,0) #0
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <256 x i8>, <256 x i8>* %a
%op2 = load <256 x i8>, <256 x i8>* %b
%op1 = load <256 x i8>, ptr %a
%op2 = load <256 x i8>, ptr %b
%mask = icmp eq <256 x i8> %op1, %op2
%sel = select <256 x i1> %mask, <256 x i8> %op1, <256 x i8> %op2
store <256 x i8> %sel, <256 x i8>* %a
store <256 x i8> %sel, ptr %a
ret void
}

Expand Down Expand Up @@ -142,7 +142,7 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) v
ret <8 x i16> %sel
}

define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0 {
define void @select_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
Expand All @@ -152,15 +152,15 @@ define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b) vscale_range(2,0) #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, <16 x i16>* %a
%op2 = load <16 x i16>, <16 x i16>* %b
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%mask = icmp eq <16 x i16> %op1, %op2
%sel = select <16 x i1> %mask, <16 x i16> %op1, <16 x i16> %op2
store <16 x i16> %sel, <16 x i16>* %a
store <16 x i16> %sel, ptr %a
ret void
}

define void @select_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
define void @select_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
Expand All @@ -186,15 +186,15 @@ define void @select_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 {
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <32 x i16>, <32 x i16>* %a
%op2 = load <32 x i16>, <32 x i16>* %b
%op1 = load <32 x i16>, ptr %a
%op2 = load <32 x i16>, ptr %b
%mask = icmp eq <32 x i16> %op1, %op2
%sel = select <32 x i1> %mask, <32 x i16> %op1, <32 x i16> %op2
store <32 x i16> %sel, <32 x i16>* %a
store <32 x i16> %sel, ptr %a
ret void
}

define void @select_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0 {
define void @select_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
Expand All @@ -204,15 +204,15 @@ define void @select_v64i16(<64 x i16>* %a, <64 x i16>* %b) vscale_range(8,0) #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i16>, <64 x i16>* %a
%op2 = load <64 x i16>, <64 x i16>* %b
%op1 = load <64 x i16>, ptr %a
%op2 = load <64 x i16>, ptr %b
%mask = icmp eq <64 x i16> %op1, %op2
%sel = select <64 x i1> %mask, <64 x i16> %op1, <64 x i16> %op2
store <64 x i16> %sel, <64 x i16>* %a
store <64 x i16> %sel, ptr %a
ret void
}

define void @select_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0) #0 {
define void @select_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
Expand All @@ -222,11 +222,11 @@ define void @select_v128i16(<128 x i16>* %a, <128 x i16>* %b) vscale_range(16,0)
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <128 x i16>, <128 x i16>* %a
%op2 = load <128 x i16>, <128 x i16>* %b
%op1 = load <128 x i16>, ptr %a
%op2 = load <128 x i16>, ptr %b
%mask = icmp eq <128 x i16> %op1, %op2
%sel = select <128 x i1> %mask, <128 x i16> %op1, <128 x i16> %op2
store <128 x i16> %sel, <128 x i16>* %a
store <128 x i16> %sel, ptr %a
ret void
}

Expand Down Expand Up @@ -255,7 +255,7 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) v
ret <4 x i32> %sel
}

define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
define void @select_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
Expand All @@ -265,15 +265,15 @@ define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b) vscale_range(2,0) #0 {
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, <8 x i32>* %a
%op2 = load <8 x i32>, <8 x i32>* %b
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%mask = icmp eq <8 x i32> %op1, %op2
%sel = select <8 x i1> %mask, <8 x i32> %op1, <8 x i32> %op2
store <8 x i32> %sel, <8 x i32>* %a
store <8 x i32> %sel, ptr %a
ret void
}

define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
define void @select_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
Expand All @@ -299,15 +299,15 @@ define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 {
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <16 x i32>, <16 x i32>* %a
%op2 = load <16 x i32>, <16 x i32>* %b
%op1 = load <16 x i32>, ptr %a
%op2 = load <16 x i32>, ptr %b
%mask = icmp eq <16 x i32> %op1, %op2
%sel = select <16 x i1> %mask, <16 x i32> %op1, <16 x i32> %op2
store <16 x i32> %sel, <16 x i32>* %a
store <16 x i32> %sel, ptr %a
ret void
}

define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0 {
define void @select_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
Expand All @@ -317,15 +317,15 @@ define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b) vscale_range(8,0) #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i32>, <32 x i32>* %a
%op2 = load <32 x i32>, <32 x i32>* %b
%op1 = load <32 x i32>, ptr %a
%op2 = load <32 x i32>, ptr %b
%mask = icmp eq <32 x i32> %op1, %op2
%sel = select <32 x i1> %mask, <32 x i32> %op1, <32 x i32> %op2
store <32 x i32> %sel, <32 x i32>* %a
store <32 x i32> %sel, ptr %a
ret void
}

define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0 {
define void @select_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
Expand All @@ -335,11 +335,11 @@ define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b) vscale_range(16,0) #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i32>, <64 x i32>* %a
%op2 = load <64 x i32>, <64 x i32>* %b
%op1 = load <64 x i32>, ptr %a
%op2 = load <64 x i32>, ptr %b
%mask = icmp eq <64 x i32> %op1, %op2
%sel = select <64 x i1> %mask, <64 x i32> %op1, <64 x i32> %op2
store <64 x i32> %sel, <64 x i32>* %a
store <64 x i32> %sel, ptr %a
ret void
}

Expand Down Expand Up @@ -369,7 +369,7 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) v
ret <2 x i64> %sel
}

define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
define void @select_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-LABEL: select_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
Expand All @@ -379,15 +379,15 @@ define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b) vscale_range(2,0) #0 {
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, <4 x i64>* %a
%op2 = load <4 x i64>, <4 x i64>* %b
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%mask = icmp eq <4 x i64> %op1, %op2
%sel = select <4 x i1> %mask, <4 x i64> %op1, <4 x i64> %op2
store <4 x i64> %sel, <4 x i64>* %a
store <4 x i64> %sel, ptr %a
ret void
}

define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
define void @select_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-LABEL: select_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
Expand All @@ -413,15 +413,15 @@ define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 {
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <8 x i64>, <8 x i64>* %a
%op2 = load <8 x i64>, <8 x i64>* %b
%op1 = load <8 x i64>, ptr %a
%op2 = load <8 x i64>, ptr %b
%mask = icmp eq <8 x i64> %op1, %op2
%sel = select <8 x i1> %mask, <8 x i64> %op1, <8 x i64> %op2
store <8 x i64> %sel, <8 x i64>* %a
store <8 x i64> %sel, ptr %a
ret void
}

define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0 {
define void @select_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-LABEL: select_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
Expand All @@ -431,15 +431,15 @@ define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b) vscale_range(8,0) #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i64>, <16 x i64>* %a
%op2 = load <16 x i64>, <16 x i64>* %b
%op1 = load <16 x i64>, ptr %a
%op2 = load <16 x i64>, ptr %b
%mask = icmp eq <16 x i64> %op1, %op2
%sel = select <16 x i1> %mask, <16 x i64> %op1, <16 x i64> %op2
store <16 x i64> %sel, <16 x i64>* %a
store <16 x i64> %sel, ptr %a
ret void
}

define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0 {
define void @select_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-LABEL: select_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
Expand All @@ -449,11 +449,11 @@ define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b) vscale_range(16,0) #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i64>, <32 x i64>* %a
%op2 = load <32 x i64>, <32 x i64>* %b
%op1 = load <32 x i64>, ptr %a
%op2 = load <32 x i64>, ptr %b
%mask = icmp eq <32 x i64> %op1, %op2
%sel = select <32 x i1> %mask, <32 x i64> %op1, <32 x i64> %op2
store <32 x i64> %sel, <32 x i64>* %a
store <32 x i64> %sel, ptr %a
ret void
}

Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-limit-duplane.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"

define <4 x i32> @test(<16 x i32>* %arg1, <16 x i32>* %arg2) {
define <4 x i32> @test(ptr %arg1, ptr %arg2) {
; CHECK-LABEL: test:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov x8, #8
Expand All @@ -19,15 +19,15 @@ define <4 x i32> @test(<16 x i32>* %arg1, <16 x i32>* %arg2) {
; CHECK-NEXT: st1w { z2.s }, p0, [x0]
; CHECK-NEXT: ret
entry:
%0 = load <16 x i32>, <16 x i32>* %arg1, align 256
%1 = load <16 x i32>, <16 x i32>* %arg2, align 256
%0 = load <16 x i32>, ptr %arg1, align 256
%1 = load <16 x i32>, ptr %arg2, align 256
%shvec = shufflevector <16 x i32> %0, <16 x i32> %1, <4 x i32> <i32 14, i32 14, i32 14, i32 14>
%2 = add <16 x i32> %0, %0
store <16 x i32> %2, <16 x i32>* %arg1, align 256
store <16 x i32> %2, ptr %arg1, align 256
ret <4 x i32> %shvec
}

define <2 x i32> @test2(<16 x i32>* %arg1, <16 x i32>* %arg2) {
define <2 x i32> @test2(ptr %arg1, ptr %arg2) {
; CHECK-LABEL: test2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov x8, #8
Expand All @@ -43,10 +43,10 @@ define <2 x i32> @test2(<16 x i32>* %arg1, <16 x i32>* %arg2) {
; CHECK-NEXT: st1w { z2.s }, p0, [x0]
; CHECK-NEXT: ret
entry:
%0 = load <16 x i32>, <16 x i32>* %arg1, align 256
%1 = load <16 x i32>, <16 x i32>* %arg2, align 256
%0 = load <16 x i32>, ptr %arg1, align 256
%1 = load <16 x i32>, ptr %arg2, align 256
%shvec = shufflevector <16 x i32> %0, <16 x i32> %1, <2 x i32> <i32 14, i32 14>
%2 = add <16 x i32> %0, %0
store <16 x i32> %2, <16 x i32>* %arg1, align 256
store <16 x i32> %2, ptr %arg1, align 256
ret <2 x i32> %shvec
}
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,37 @@
target triple = "aarch64-unknown-linux-gnu"

; Don't use SVE for 64-bit vectors.
define <2 x float> @load_v2f32(<2 x float>* %a) #0 {
define <2 x float> @load_v2f32(ptr %a) #0 {
; CHECK-LABEL: load_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
%load = load <2 x float>, <2 x float>* %a
%load = load <2 x float>, ptr %a
ret <2 x float> %load
}

; Don't use SVE for 128-bit vectors.
define <4 x float> @load_v4f32(<4 x float>* %a) #0 {
define <4 x float> @load_v4f32(ptr %a) #0 {
; CHECK-LABEL: load_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
%load = load <4 x float>, <4 x float>* %a
%load = load <4 x float>, ptr %a
ret <4 x float> %load
}

define <8 x float> @load_v8f32(<8 x float>* %a) #0 {
define <8 x float> @load_v8f32(ptr %a) #0 {
; CHECK-LABEL: load_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-NEXT: ret
%load = load <8 x float>, <8 x float>* %a
%load = load <8 x float>, ptr %a
ret <8 x float> %load
}

define <16 x float> @load_v16f32(<16 x float>* %a) #0 {
define <16 x float> @load_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: load_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
Expand Down Expand Up @@ -80,11 +80,11 @@ define <16 x float> @load_v16f32(<16 x float>* %a) #0 {
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
%load = load <16 x float>, <16 x float>* %a
%load = load <16 x float>, ptr %a
ret <16 x float> %load
}

define <32 x float> @load_v32f32(<32 x float>* %a) #0 {
define <32 x float> @load_v32f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: load_v32f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #16
Expand Down Expand Up @@ -124,11 +124,11 @@ define <32 x float> @load_v32f32(<32 x float>* %a) #0 {
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
%load = load <32 x float>, <32 x float>* %a
%load = load <32 x float>, ptr %a
ret <32 x float> %load
}

define <64 x float> @load_v64f32(<64 x float>* %a) #0 {
define <64 x float> @load_v64f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: load_v64f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x9, #8
Expand Down Expand Up @@ -189,7 +189,7 @@ define <64 x float> @load_v64f32(<64 x float>* %a) #0 {
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
; VBITS_GE_2048-NEXT: ret
%load = load <64 x float>, <64 x float>* %a
%load = load <64 x float>, ptr %a
ret <64 x float> %load
}

Expand Down
192 changes: 96 additions & 96 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll

Large diffs are not rendered by default.

200 changes: 100 additions & 100 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll

Large diffs are not rendered by default.

608 changes: 304 additions & 304 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll

Large diffs are not rendered by default.

374 changes: 187 additions & 187 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll

Large diffs are not rendered by default.

500 changes: 250 additions & 250 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll

Large diffs are not rendered by default.

130 changes: 65 additions & 65 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ target triple = "aarch64-unknown-linux-gnu"
; Masked Stores
;

define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_range(2,0) #0 {
define void @masked_store_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_store_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s1, [x0]
Expand All @@ -27,14 +27,14 @@ define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_range(2
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
%a = load <2 x half>, <2 x half>* %ap
%b = load <2 x half>, <2 x half>* %bp
%a = load <2 x half>, ptr %ap
%b = load <2 x half>, ptr %bp
%mask = fcmp oeq <2 x half> %a, %b
call void @llvm.masked.store.v2f16(<2 x half> %a, <2 x half>* %bp, i32 8, <2 x i1> %mask)
call void @llvm.masked.store.v2f16(<2 x half> %a, ptr %bp, i32 8, <2 x i1> %mask)
ret void
}

define void @masked_store_v2f32(<2 x float>* %ap, <2 x float>* %bp) vscale_range(2,0) #0 {
define void @masked_store_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_store_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
Expand All @@ -44,14 +44,14 @@ define void @masked_store_v2f32(<2 x float>* %ap, <2 x float>* %bp) vscale_range
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%a = load <2 x float>, <2 x float>* %ap
%b = load <2 x float>, <2 x float>* %bp
%a = load <2 x float>, ptr %ap
%b = load <2 x float>, ptr %bp
%mask = fcmp oeq <2 x float> %a, %b
call void @llvm.masked.store.v2f32(<2 x float> %a, <2 x float>* %bp, i32 8, <2 x i1> %mask)
call void @llvm.masked.store.v2f32(<2 x float> %a, ptr %bp, i32 8, <2 x i1> %mask)
ret void
}

define void @masked_store_v4f32(<4 x float>* %ap, <4 x float>* %bp) vscale_range(2,0) #0 {
define void @masked_store_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_store_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
Expand All @@ -61,14 +61,14 @@ define void @masked_store_v4f32(<4 x float>* %ap, <4 x float>* %bp) vscale_range
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %ap
%b = load <4 x float>, <4 x float>* %bp
%a = load <4 x float>, ptr %ap
%b = load <4 x float>, ptr %bp
%mask = fcmp oeq <4 x float> %a, %b
call void @llvm.masked.store.v4f32(<4 x float> %a, <4 x float>* %bp, i32 8, <4 x i1> %mask)
call void @llvm.masked.store.v4f32(<4 x float> %a, ptr %bp, i32 8, <4 x i1> %mask)
ret void
}

define void @masked_store_v8f32(<8 x float>* %ap, <8 x float>* %bp) vscale_range(2,0) #0 {
define void @masked_store_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
; CHECK-LABEL: masked_store_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
Expand All @@ -77,14 +77,14 @@ define void @masked_store_v8f32(<8 x float>* %ap, <8 x float>* %bp) vscale_range
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
; CHECK-NEXT: ret
%a = load <8 x float>, <8 x float>* %ap
%b = load <8 x float>, <8 x float>* %bp
%a = load <8 x float>, ptr %ap
%b = load <8 x float>, ptr %bp
%mask = fcmp oeq <8 x float> %a, %b
call void @llvm.masked.store.v8f32(<8 x float> %a, <8 x float>* %bp, i32 8, <8 x i1> %mask)
call void @llvm.masked.store.v8f32(<8 x float> %a, ptr %bp, i32 8, <8 x i1> %mask)
ret void
}

define void @masked_store_v16f32(<16 x float>* %ap, <16 x float>* %bp) #0 {
define void @masked_store_v16f32(ptr %ap, ptr %bp) #0 {
; VBITS_GE_256-LABEL: masked_store_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
Expand All @@ -107,14 +107,14 @@ define void @masked_store_v16f32(<16 x float>* %ap, <16 x float>* %bp) #0 {
; VBITS_GE_512-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%a = load <16 x float>, <16 x float>* %ap
%b = load <16 x float>, <16 x float>* %bp
%a = load <16 x float>, ptr %ap
%b = load <16 x float>, ptr %bp
%mask = fcmp oeq <16 x float> %a, %b
call void @llvm.masked.store.v16f32(<16 x float> %a, <16 x float>* %ap, i32 8, <16 x i1> %mask)
call void @llvm.masked.store.v16f32(<16 x float> %a, ptr %ap, i32 8, <16 x i1> %mask)
ret void
}

define void @masked_store_v32f32(<32 x float>* %ap, <32 x float>* %bp) vscale_range(8,0) #0 {
define void @masked_store_v32f32(ptr %ap, ptr %bp) vscale_range(8,0) #0 {
; CHECK-LABEL: masked_store_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
Expand All @@ -123,14 +123,14 @@ define void @masked_store_v32f32(<32 x float>* %ap, <32 x float>* %bp) vscale_ra
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%a = load <32 x float>, <32 x float>* %ap
%b = load <32 x float>, <32 x float>* %bp
%a = load <32 x float>, ptr %ap
%b = load <32 x float>, ptr %bp
%mask = fcmp oeq <32 x float> %a, %b
call void @llvm.masked.store.v32f32(<32 x float> %a, <32 x float>* %ap, i32 8, <32 x i1> %mask)
call void @llvm.masked.store.v32f32(<32 x float> %a, ptr %ap, i32 8, <32 x i1> %mask)
ret void
}

define void @masked_store_v64f32(<64 x float>* %ap, <64 x float>* %bp) vscale_range(16,0) #0 {
define void @masked_store_v64f32(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
; CHECK-LABEL: masked_store_v64f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
Expand All @@ -139,14 +139,14 @@ define void @masked_store_v64f32(<64 x float>* %ap, <64 x float>* %bp) vscale_ra
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%a = load <64 x float>, <64 x float>* %ap
%b = load <64 x float>, <64 x float>* %bp
%a = load <64 x float>, ptr %ap
%b = load <64 x float>, ptr %bp
%mask = fcmp oeq <64 x float> %a, %b
call void @llvm.masked.store.v64f32(<64 x float> %a, <64 x float>* %ap, i32 8, <64 x i1> %mask)
call void @llvm.masked.store.v64f32(<64 x float> %a, ptr %ap, i32 8, <64 x i1> %mask)
ret void
}

define void @masked_store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i8>* %dest) #0 {
define void @masked_store_trunc_v8i64i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v8i64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
Expand Down Expand Up @@ -179,15 +179,15 @@ define void @masked_store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i8>
; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <8 x i64>, <8 x i64>* %ap
%b = load <8 x i64>, <8 x i64>* %bp
%a = load <8 x i64>, ptr %ap
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %a, %b
%val = trunc <8 x i64> %a to <8 x i8>
call void @llvm.masked.store.v8i8(<8 x i8> %val, <8 x i8>* %dest, i32 8, <8 x i1> %mask)
call void @llvm.masked.store.v8i8(<8 x i8> %val, ptr %dest, i32 8, <8 x i1> %mask)
ret void
}

define void @masked_store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i16>* %dest) #0 {
define void @masked_store_trunc_v8i64i16(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v8i64i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
Expand Down Expand Up @@ -223,15 +223,15 @@ define void @masked_store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i1
; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <8 x i64>, <8 x i64>* %ap
%b = load <8 x i64>, <8 x i64>* %bp
%a = load <8 x i64>, ptr %ap
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %a, %b
%val = trunc <8 x i64> %a to <8 x i16>
call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %dest, i32 8, <8 x i1> %mask)
call void @llvm.masked.store.v8i16(<8 x i16> %val, ptr %dest, i32 8, <8 x i1> %mask)
ret void
}

define void @masked_store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i32>* %dest) #0 {
define void @masked_store_trunc_v8i64i32(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v8i64i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
Expand Down Expand Up @@ -264,15 +264,15 @@ define void @masked_store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i3
; VBITS_GE_512-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <8 x i64>, <8 x i64>* %ap
%b = load <8 x i64>, <8 x i64>* %bp
%a = load <8 x i64>, ptr %ap
%b = load <8 x i64>, ptr %bp
%mask = icmp eq <8 x i64> %a, %b
%val = trunc <8 x i64> %a to <8 x i32>
call void @llvm.masked.store.v8i32(<8 x i32> %val, <8 x i32>* %dest, i32 8, <8 x i1> %mask)
call void @llvm.masked.store.v8i32(<8 x i32> %val, ptr %dest, i32 8, <8 x i1> %mask)
ret void
}

define void @masked_store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i32>* %bp, <16 x i8>* %dest) #0 {
define void @masked_store_trunc_v16i32i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v16i32i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
Expand Down Expand Up @@ -308,15 +308,15 @@ define void @masked_store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i32>* %bp, <16 x
; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
; VBITS_GE_512-NEXT: st1b { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <16 x i32>, <16 x i32>* %ap
%b = load <16 x i32>, <16 x i32>* %bp
%a = load <16 x i32>, ptr %ap
%b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %a, %b
%val = trunc <16 x i32> %a to <16 x i8>
call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %dest, i32 8, <16 x i1> %mask)
call void @llvm.masked.store.v16i8(<16 x i8> %val, ptr %dest, i32 8, <16 x i1> %mask)
ret void
}

define void @masked_store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i32>* %bp, <16 x i16>* %dest) #0 {
define void @masked_store_trunc_v16i32i16(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v16i32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
Expand Down Expand Up @@ -352,15 +352,15 @@ define void @masked_store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i32>* %bp, <16
; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <16 x i32>, <16 x i32>* %ap
%b = load <16 x i32>, <16 x i32>* %bp
%a = load <16 x i32>, ptr %ap
%b = load <16 x i32>, ptr %bp
%mask = icmp eq <16 x i32> %a, %b
%val = trunc <16 x i32> %a to <16 x i16>
call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %dest, i32 8, <16 x i1> %mask)
call void @llvm.masked.store.v16i16(<16 x i16> %val, ptr %dest, i32 8, <16 x i1> %mask)
ret void
}

define void @masked_store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i16>* %bp, <32 x i8>* %dest) #0 {
define void @masked_store_trunc_v32i16i8(ptr %ap, ptr %bp, ptr %dest) #0 {
; VBITS_GE_256-LABEL: masked_store_trunc_v32i16i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
Expand Down Expand Up @@ -393,27 +393,27 @@ define void @masked_store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i16>* %bp, <32 x
; VBITS_GE_512-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
; VBITS_GE_512-NEXT: st1b { z0.h }, p0, [x2]
; VBITS_GE_512-NEXT: ret
%a = load <32 x i16>, <32 x i16>* %ap
%b = load <32 x i16>, <32 x i16>* %bp
%a = load <32 x i16>, ptr %ap
%b = load <32 x i16>, ptr %bp
%mask = icmp eq <32 x i16> %a, %b
%val = trunc <32 x i16> %a to <32 x i8>
call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %dest, i32 8, <32 x i1> %mask)
call void @llvm.masked.store.v32i8(<32 x i8> %val, ptr %dest, i32 8, <32 x i1> %mask)
ret void
}

declare void @llvm.masked.store.v2f16(<2 x half>, <2 x half>*, i32, <2 x i1>)
declare void @llvm.masked.store.v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
declare void @llvm.masked.store.v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
declare void @llvm.masked.store.v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>)
declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
declare void @llvm.masked.store.v32f32(<32 x float>, <32 x float>*, i32, <32 x i1>)
declare void @llvm.masked.store.v64f32(<64 x float>, <64 x float>*, i32, <64 x i1>)
declare void @llvm.masked.store.v2f16(<2 x half>, ptr, i32, <2 x i1>)
declare void @llvm.masked.store.v2f32(<2 x float>, ptr, i32, <2 x i1>)
declare void @llvm.masked.store.v4f32(<4 x float>, ptr, i32, <4 x i1>)
declare void @llvm.masked.store.v8f32(<8 x float>, ptr, i32, <8 x i1>)
declare void @llvm.masked.store.v16f32(<16 x float>, ptr, i32, <16 x i1>)
declare void @llvm.masked.store.v32f32(<32 x float>, ptr, i32, <32 x i1>)
declare void @llvm.masked.store.v64f32(<64 x float>, ptr, i32, <64 x i1>)

declare void @llvm.masked.store.v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
declare void @llvm.masked.store.v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
declare void @llvm.masked.store.v8i8(<8 x i8>, ptr, i32, <8 x i1>)
declare void @llvm.masked.store.v8i16(<8 x i16>, ptr, i32, <8 x i1>)
declare void @llvm.masked.store.v8i32(<8 x i32>, ptr, i32, <8 x i1>)
declare void @llvm.masked.store.v16i8(<16 x i8>, ptr, i32, <16 x i1>)
declare void @llvm.masked.store.v16i16(<16 x i16>, ptr, i32, <16 x i1>)
declare void @llvm.masked.store.v32i8(<32 x i8>, ptr, i32, <32 x i1>)

attributes #0 = { "target-features"="+sve" }
52 changes: 26 additions & 26 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-optimize-ptrue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

target triple = "aarch64-unknown-linux-gnu"

define void @add_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
define void @add_v64i8(ptr %a, ptr %b) #0 {
; CHECK-LABEL: add_v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
Expand All @@ -12,14 +12,14 @@ define void @add_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 {
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i8>, <64 x i8>* %a
%op2 = load <64 x i8>, <64 x i8>* %b
%op1 = load <64 x i8>, ptr %a
%op2 = load <64 x i8>, ptr %b
%res = add <64 x i8> %op1, %op2
store <64 x i8> %res, <64 x i8>* %a
store <64 x i8> %res, ptr %a
ret void
}

define void @add_v32i16(<32 x i16>* %a, <32 x i16>* %b, <32 x i16>* %c) #0 {
define void @add_v32i16(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-LABEL: add_v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
Expand All @@ -28,42 +28,42 @@ define void @add_v32i16(<32 x i16>* %a, <32 x i16>* %b, <32 x i16>* %c) #0 {
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i16>, <32 x i16>* %a
%op2 = load <32 x i16>, <32 x i16>* %b
%op1 = load <32 x i16>, ptr %a
%op2 = load <32 x i16>, ptr %b
%res = add <32 x i16> %op1, %op2
store <32 x i16> %res, <32 x i16>* %a
store <32 x i16> %res, ptr %a
ret void
}

define void @abs_v16i32(<16 x i32>* %a) #0 {
define void @abs_v16i32(ptr %a) #0 {
; CHECK-LABEL: abs_v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i32>, <16 x i32>* %a
%op1 = load <16 x i32>, ptr %a
%res = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %op1, i1 false)
store <16 x i32> %res, <16 x i32>* %a
store <16 x i32> %res, ptr %a
ret void
}

define void @abs_v8i64(<8 x i64>* %a) #0 {
define void @abs_v8i64(ptr %a) #0 {
; CHECK-LABEL: abs_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: abs z0.d, p0/m, z0.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i64>, <8 x i64>* %a
%op1 = load <8 x i64>, ptr %a
%res = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %op1, i1 false)
store <8 x i64> %res, <8 x i64>* %a
store <8 x i64> %res, ptr %a
ret void
}

define void @fadd_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
define void @fadd_v32f16(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fadd_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
Expand All @@ -72,14 +72,14 @@ define void @fadd_v32f16(<32 x half>* %a, <32 x half>* %b) #0 {
; CHECK-NEXT: fadd z0.h, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x half>, <32 x half>* %a
%op2 = load <32 x half>, <32 x half>* %b
%op1 = load <32 x half>, ptr %a
%op2 = load <32 x half>, ptr %b
%res = fadd <32 x half> %op1, %op2
store <32 x half> %res, <32 x half>* %a
store <32 x half> %res, ptr %a
ret void
}

define void @fadd_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
define void @fadd_v16f32(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fadd_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
Expand All @@ -88,14 +88,14 @@ define void @fadd_v16f32(<16 x float>* %a, <16 x float>* %b) #0 {
; CHECK-NEXT: fadd z0.s, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x float>, <16 x float>* %a
%op2 = load <16 x float>, <16 x float>* %b
%op1 = load <16 x float>, ptr %a
%op2 = load <16 x float>, ptr %b
%res = fadd <16 x float> %op1, %op2
store <16 x float> %res, <16 x float>* %a
store <16 x float> %res, ptr %a
ret void
}

define void @fadd_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
define void @fadd_v8f64(ptr %a, ptr %b) #0 {
; CHECK-LABEL: fadd_v8f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
Expand All @@ -104,10 +104,10 @@ define void @fadd_v8f64(<8 x double>* %a, <8 x double>* %b) #0 {
; CHECK-NEXT: fadd z0.d, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x double>, <8 x double>* %a
%op2 = load <8 x double>, <8 x double>* %b
%op1 = load <8 x double>, ptr %a
%op2 = load <8 x double>, ptr %b
%res = fadd <8 x double> %op1, %op2
store <8 x double> %res, <8 x double>* %a
store <8 x double> %res, ptr %a
ret void
}

Expand Down
124 changes: 62 additions & 62 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll

Large diffs are not rendered by default.

200 changes: 100 additions & 100 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll

Large diffs are not rendered by default.

39 changes: 15 additions & 24 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s

define i1 @ptest_v16i1_256bit_min_sve(float* %a, float * %b) vscale_range(2, 0) {
define i1 @ptest_v16i1_256bit_min_sve(ptr %a, ptr %b) vscale_range(2, 0) {
; CHECK-LABEL: ptest_v16i1_256bit_min_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #8
Expand All @@ -22,14 +22,13 @@ define i1 @ptest_v16i1_256bit_min_sve(float* %a, float * %b) vscale_range(2, 0)
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%v0 = bitcast float* %a to <16 x float>*
%v1 = load <16 x float>, <16 x float>* %v0, align 4
%v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
ret i1 %v3
}

define i1 @ptest_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0) {
define i1 @ptest_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
; CHECK-LABEL: ptest_v16i1_512bit_min_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
Expand All @@ -43,14 +42,13 @@ define i1 @ptest_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0)
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%v0 = bitcast float* %a to <16 x float>*
%v1 = load <16 x float>, <16 x float>* %v0, align 4
%v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
ret i1 %v3
}

define i1 @ptest_v16i1_512bit_sve(float* %a, float * %b) vscale_range(4, 4) {
define i1 @ptest_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) {
; CHECK-LABEL: ptest_v16i1_512bit_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
Expand All @@ -64,14 +62,13 @@ define i1 @ptest_v16i1_512bit_sve(float* %a, float * %b) vscale_range(4, 4) {
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%v0 = bitcast float* %a to <16 x float>*
%v1 = load <16 x float>, <16 x float>* %v0, align 4
%v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
ret i1 %v3
}

define i1 @ptest_or_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0) {
define i1 @ptest_or_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
; CHECK-LABEL: ptest_or_v16i1_512bit_min_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
Expand All @@ -88,11 +85,9 @@ define i1 @ptest_or_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4,
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%v0 = bitcast float* %a to <16 x float>*
%v1 = load <16 x float>, <16 x float>* %v0, align 4
%v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
%v3 = bitcast float* %b to <16 x float>*
%v4 = load <16 x float>, <16 x float>* %v3, align 4
%v4 = load <16 x float>, ptr %b, align 4
%v5 = fcmp une <16 x float> %v4, zeroinitializer
%v6 = or <16 x i1> %v2, %v5
%v7 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v6)
Expand All @@ -105,7 +100,7 @@ declare i1 @llvm.vector.reduce.or.i1.v16i1(<16 x i1>)
; AND reduction.
;

define i1 @ptest_and_v16i1_512bit_sve(float* %a, float * %b) vscale_range(4, 4) {
define i1 @ptest_and_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) {
; CHECK-LABEL: ptest_and_v16i1_512bit_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
Expand All @@ -121,18 +116,16 @@ define i1 @ptest_and_v16i1_512bit_sve(float* %a, float * %b) vscale_range(4, 4)
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%v0 = bitcast float* %a to <16 x float>*
%v1 = load <16 x float>, <16 x float>* %v0, align 4
%v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
%v3 = bitcast float* %b to <16 x float>*
%v4 = load <16 x float>, <16 x float>* %v3, align 4
%v4 = load <16 x float>, ptr %b, align 4
%v5 = fcmp une <16 x float> %v4, zeroinitializer
%v6 = and <16 x i1> %v2, %v5
%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)
ret i1 %v7
}

define i1 @ptest_and_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4, 0) {
define i1 @ptest_and_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
; CHECK-LABEL: ptest_and_v16i1_512bit_min_sve:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
Expand All @@ -149,11 +142,9 @@ define i1 @ptest_and_v16i1_512bit_min_sve(float* %a, float * %b) vscale_range(4,
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%v0 = bitcast float* %a to <16 x float>*
%v1 = load <16 x float>, <16 x float>* %v0, align 4
%v1 = load <16 x float>, ptr %a, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
%v3 = bitcast float* %b to <16 x float>*
%v4 = load <16 x float>, <16 x float>* %v3, align 4
%v4 = load <16 x float>, ptr %b, align 4
%v5 = fcmp une <16 x float> %v4, zeroinitializer
%v6 = and <16 x i1> %v2, %v5
%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)
Expand Down
168 changes: 84 additions & 84 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll

Large diffs are not rendered by default.

96 changes: 48 additions & 48 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,21 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) vscale_range(2,0) #0 {
ret <16 x i8> %res
}

define void @sdiv_v32i8(<32 x i8>* %a) vscale_range(2,0) #0 {
define void @sdiv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, <32 x i8>* %a
%op1 = load <32 x i8>, ptr %a
%res = sdiv <32 x i8> %op1, shufflevector (<32 x i8> insertelement (<32 x i8> poison, i8 32, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)
store <32 x i8> %res, <32 x i8>* %a
store <32 x i8> %res, ptr %a
ret void
}

define void @sdiv_v64i8(<64 x i8>* %a) #0 {
define void @sdiv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: sdiv_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
Expand All @@ -63,37 +63,37 @@ define void @sdiv_v64i8(<64 x i8>* %a) #0 {
; VBITS_GE_512-NEXT: asrd z0.b, p0/m, z0.b, #5
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <64 x i8>, <64 x i8>* %a
%op1 = load <64 x i8>, ptr %a
%res = sdiv <64 x i8> %op1, shufflevector (<64 x i8> insertelement (<64 x i8> poison, i8 32, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)
store <64 x i8> %res, <64 x i8>* %a
store <64 x i8> %res, ptr %a
ret void
}

define void @sdiv_v128i8(<128 x i8>* %a) vscale_range(8,0) #0 {
define void @sdiv_v128i8(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v128i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl128
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <128 x i8>, <128 x i8>* %a
%op1 = load <128 x i8>, ptr %a
%res = sdiv <128 x i8> %op1, shufflevector (<128 x i8> insertelement (<128 x i8> poison, i8 32, i32 0), <128 x i8> poison, <128 x i32> zeroinitializer)
store <128 x i8> %res, <128 x i8>* %a
store <128 x i8> %res, ptr %a
ret void
}

define void @sdiv_v256i8(<256 x i8>* %a) vscale_range(16,0) #0 {
define void @sdiv_v256i8(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v256i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl256
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <256 x i8>, <256 x i8>* %a
%op1 = load <256 x i8>, ptr %a
%res = sdiv <256 x i8> %op1, shufflevector (<256 x i8> insertelement (<256 x i8> poison, i8 32, i32 0), <256 x i8> poison, <256 x i32> zeroinitializer)
store <256 x i8> %res, <256 x i8>* %a
store <256 x i8> %res, ptr %a
ret void
}

Expand Down Expand Up @@ -121,21 +121,21 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) vscale_range(2,0) #0 {
ret <8 x i16> %res
}

define void @sdiv_v16i16(<16 x i16>* %a) vscale_range(2,0) #0 {
define void @sdiv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, <16 x i16>* %a
%op1 = load <16 x i16>, ptr %a
%res = sdiv <16 x i16> %op1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 32, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)
store <16 x i16> %res, <16 x i16>* %a
store <16 x i16> %res, ptr %a
ret void
}

define void @sdiv_v32i16(<32 x i16>* %a) #0 {
define void @sdiv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: sdiv_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
Expand All @@ -155,37 +155,37 @@ define void @sdiv_v32i16(<32 x i16>* %a) #0 {
; VBITS_GE_512-NEXT: asrd z0.h, p0/m, z0.h, #5
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <32 x i16>, <32 x i16>* %a
%op1 = load <32 x i16>, ptr %a
%res = sdiv <32 x i16> %op1, shufflevector (<32 x i16> insertelement (<32 x i16> poison, i16 32, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)
store <32 x i16> %res, <32 x i16>* %a
store <32 x i16> %res, ptr %a
ret void
}

define void @sdiv_v64i16(<64 x i16>* %a) vscale_range(8,0) #0 {
define void @sdiv_v64i16(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v64i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl64
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i16>, <64 x i16>* %a
%op1 = load <64 x i16>, ptr %a
%res = sdiv <64 x i16> %op1, shufflevector (<64 x i16> insertelement (<64 x i16> poison, i16 32, i32 0), <64 x i16> poison, <64 x i32> zeroinitializer)
store <64 x i16> %res, <64 x i16>* %a
store <64 x i16> %res, ptr %a
ret void
}

define void @sdiv_v128i16(<128 x i16>* %a) vscale_range(16,0) #0 {
define void @sdiv_v128i16(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v128i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl128
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <128 x i16>, <128 x i16>* %a
%op1 = load <128 x i16>, ptr %a
%res = sdiv <128 x i16> %op1, shufflevector (<128 x i16> insertelement (<128 x i16> poison, i16 32, i32 0), <128 x i16> poison, <128 x i32> zeroinitializer)
store <128 x i16> %res, <128 x i16>* %a
store <128 x i16> %res, ptr %a
ret void
}

Expand Down Expand Up @@ -213,21 +213,21 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) vscale_range(2,0) #0 {
ret <4 x i32> %res
}

define void @sdiv_v8i32(<8 x i32>* %a) vscale_range(2,0) #0 {
define void @sdiv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, <8 x i32>* %a
%op1 = load <8 x i32>, ptr %a
%res = sdiv <8 x i32> %op1, shufflevector (<8 x i32> insertelement (<8 x i32> poison, i32 32, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)
store <8 x i32> %res, <8 x i32>* %a
store <8 x i32> %res, ptr %a
ret void
}

define void @sdiv_v16i32(<16 x i32>* %a) #0 {
define void @sdiv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: sdiv_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
Expand All @@ -247,37 +247,37 @@ define void @sdiv_v16i32(<16 x i32>* %a) #0 {
; VBITS_GE_512-NEXT: asrd z0.s, p0/m, z0.s, #5
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <16 x i32>, <16 x i32>* %a
%op1 = load <16 x i32>, ptr %a
%res = sdiv <16 x i32> %op1, shufflevector (<16 x i32> insertelement (<16 x i32> poison, i32 32, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)
store <16 x i32> %res, <16 x i32>* %a
store <16 x i32> %res, ptr %a
ret void
}

define void @sdiv_v32i32(<32 x i32>* %a) vscale_range(8,0) #0 {
define void @sdiv_v32i32(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v32i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i32>, <32 x i32>* %a
%op1 = load <32 x i32>, ptr %a
%res = sdiv <32 x i32> %op1, shufflevector (<32 x i32> insertelement (<32 x i32> poison, i32 32, i32 0), <32 x i32> poison, <32 x i32> zeroinitializer)
store <32 x i32> %res, <32 x i32>* %a
store <32 x i32> %res, ptr %a
ret void
}

define void @sdiv_v64i32(<64 x i32>* %a) vscale_range(16,0) #0 {
define void @sdiv_v64i32(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v64i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl64
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i32>, <64 x i32>* %a
%op1 = load <64 x i32>, ptr %a
%res = sdiv <64 x i32> %op1, shufflevector (<64 x i32> insertelement (<64 x i32> poison, i32 32, i32 0), <64 x i32> poison, <64 x i32> zeroinitializer)
store <64 x i32> %res, <64 x i32>* %a
store <64 x i32> %res, ptr %a
ret void
}

Expand Down Expand Up @@ -306,21 +306,21 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) vscale_range(2,0) #0 {
ret <2 x i64> %res
}

define void @sdiv_v4i64(<4 x i64>* %a) vscale_range(2,0) #0 {
define void @sdiv_v4i64(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: sdiv_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, <4 x i64>* %a
%op1 = load <4 x i64>, ptr %a
%res = sdiv <4 x i64> %op1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 32, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)
store <4 x i64> %res, <4 x i64>* %a
store <4 x i64> %res, ptr %a
ret void
}

define void @sdiv_v8i64(<8 x i64>* %a) #0 {
define void @sdiv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: sdiv_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
Expand All @@ -340,37 +340,37 @@ define void @sdiv_v8i64(<8 x i64>* %a) #0 {
; VBITS_GE_512-NEXT: asrd z0.d, p0/m, z0.d, #5
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
%op1 = load <8 x i64>, <8 x i64>* %a
%op1 = load <8 x i64>, ptr %a
%res = sdiv <8 x i64> %op1, shufflevector (<8 x i64> insertelement (<8 x i64> poison, i64 32, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)
store <8 x i64> %res, <8 x i64>* %a
store <8 x i64> %res, ptr %a
ret void
}

define void @sdiv_v16i64(<16 x i64>* %a) vscale_range(8,0) #0 {
define void @sdiv_v16i64(ptr %a) vscale_range(8,0) #0 {
; CHECK-LABEL: sdiv_v16i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i64>, <16 x i64>* %a
%op1 = load <16 x i64>, ptr %a
%res = sdiv <16 x i64> %op1, shufflevector (<16 x i64> insertelement (<16 x i64> poison, i64 32, i32 0), <16 x i64> poison, <16 x i32> zeroinitializer)
store <16 x i64> %res, <16 x i64>* %a
store <16 x i64> %res, ptr %a
ret void
}

define void @sdiv_v32i64(<32 x i64>* %a) vscale_range(16,0) #0 {
define void @sdiv_v32i64(ptr %a) vscale_range(16,0) #0 {
; CHECK-LABEL: sdiv_v32i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i64>, <32 x i64>* %a
%op1 = load <32 x i64>, ptr %a
%res = sdiv <32 x i64> %op1, shufflevector (<32 x i64> insertelement (<32 x i64> poison, i64 32, i32 0), <32 x i64> poison, <32 x i32> zeroinitializer)
store <32 x i64> %res, <32 x i64>* %a
store <32 x i64> %res, ptr %a
ret void
}

Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
; bigger than NEON. However, having no support opens us up to a code generator
; hang when expanding BUILD_VECTOR. Here we just validate the promblematic case
; successfully exits code generation.
define void @hang_when_merging_stores_after_legalisation(<8 x i32>* %a, <2 x i32> %b) vscale_range(2,2) #0 {
define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) vscale_range(2,2) #0 {
; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
Expand All @@ -33,12 +33,12 @@ define void @hang_when_merging_stores_after_legalisation(<8 x i32>* %a, <2 x i32
; CHECK-NEXT: ret
%splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer
%interleaved.vec = shufflevector <8 x i32> %splat, <8 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
store <8 x i32> %interleaved.vec, <8 x i32>* %a, align 4
store <8 x i32> %interleaved.vec, ptr %a, align 4
ret void
}

; Ensure we don't crash when trying to lower a shuffle via an extract
define void @crash_when_lowering_extract_shuffle(<32 x i32>* %dst, i1 %cond) vscale_range(2,2) #0 {
define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_range(2,2) #0 {
; CHECK-LABEL: crash_when_lowering_extract_shuffle:
; CHECK: // %bb.0:
; CHECK-NEXT: tbnz w1, #0, .LBB1_2
Expand Down Expand Up @@ -123,9 +123,9 @@ define void @crash_when_lowering_extract_shuffle(<32 x i32>* %dst, i1 %cond) vsc
br i1 %cond, label %exit, label %vector.body

vector.body:
%1 = load <32 x i32>, <32 x i32>* %dst, align 16
%1 = load <32 x i32>, ptr %dst, align 16
%predphi = select <32 x i1> %broadcast.splat, <32 x i32> zeroinitializer, <32 x i32> %1
store <32 x i32> %predphi, <32 x i32>* %dst, align 16
store <32 x i32> %predphi, ptr %dst, align 16
br label %exit

exit:
Expand Down
140 changes: 70 additions & 70 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,37 @@
target triple = "aarch64-unknown-linux-gnu"

; Don't use SVE for 64-bit vectors.
define void @store_v2f32(<2 x float>* %a) #0 {
define void @store_v2f32(ptr %a) #0 {
; CHECK-LABEL: store_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: str xzr, [x0]
; CHECK-NEXT: ret
store <2 x float> zeroinitializer, <2 x float>* %a
store <2 x float> zeroinitializer, ptr %a
ret void
}

; Don't use SVE for 128-bit vectors.
define void @store_v4f32(<4 x float>* %a) #0 {
define void @store_v4f32(ptr %a) #0 {
; CHECK-LABEL: store_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp xzr, xzr, [x0]
; CHECK-NEXT: ret
store <4 x float> zeroinitializer, <4 x float>* %a
store <4 x float> zeroinitializer, ptr %a
ret void
}

define void @store_v8f32(<8 x float>* %a) #0 {
define void @store_v8f32(ptr %a) #0 {
; CHECK-LABEL: store_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
store <8 x float> zeroinitializer, <8 x float>* %a
store <8 x float> zeroinitializer, ptr %a
ret void
}

define void @store_v16f32(<16 x float>* %a) #0 {
define void @store_v16f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: store_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
Expand Down Expand Up @@ -79,11 +79,11 @@ define void @store_v16f32(<16 x float>* %a) #0 {
; VBITS_GE_2048-NEXT: mov z0.s, #0 // =0x0
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_2048-NEXT: ret
store <16 x float> zeroinitializer, <16 x float>* %a
store <16 x float> zeroinitializer, ptr %a
ret void
}

define void @store_v32f32(<32 x float>* %a) #0 {
define void @store_v32f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: store_v32f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #24
Expand Down Expand Up @@ -119,11 +119,11 @@ define void @store_v32f32(<32 x float>* %a) #0 {
; VBITS_GE_2048-NEXT: mov z0.s, #0 // =0x0
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_2048-NEXT: ret
store <32 x float> zeroinitializer, <32 x float>* %a
store <32 x float> zeroinitializer, ptr %a
ret void
}

define void @store_v64f32(<64 x float>* %a) #0 {
define void @store_v64f32(ptr %a) #0 {
; VBITS_GE_256-LABEL: store_v64f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #56
Expand Down Expand Up @@ -173,7 +173,7 @@ define void @store_v64f32(<64 x float>* %a) #0 {
; VBITS_GE_2048-NEXT: mov z0.s, #0 // =0x0
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_2048-NEXT: ret
store <64 x float> zeroinitializer, <64 x float>* %a
store <64 x float> zeroinitializer, ptr %a
ret void
}

Expand Down
144 changes: 72 additions & 72 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll

Large diffs are not rendered by default.

60 changes: 30 additions & 30 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,33 @@

target triple = "aarch64-unknown-linux-gnu"

define void @store_trunc_v2i64i8(<2 x i64>* %ap, <2 x i8>* %dest) vscale_range(2,0) #0 {
define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) vscale_range(2,0) #0 {
; CHECK-LABEL: store_trunc_v2i64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %ap
%a = load <2 x i64>, ptr %ap
%val = trunc <2 x i64> %a to <2 x i8>
store <2 x i8> %val, <2 x i8>* %dest
store <2 x i8> %val, ptr %dest
ret void
}

define void @store_trunc_v4i64i8(<4 x i64>* %ap, <4 x i8>* %dest) vscale_range(2,0) #0 {
define void @store_trunc_v4i64i8(ptr %ap, ptr %dest) vscale_range(2,0) #0 {
; CHECK-LABEL: store_trunc_v4i64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%a = load <4 x i64>, <4 x i64>* %ap
%a = load <4 x i64>, ptr %ap
%val = trunc <4 x i64> %a to <4 x i8>
store <4 x i8> %val, <4 x i8>* %dest
store <4 x i8> %val, ptr %dest
ret void
}

define void @store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i8>* %dest) #0 {
define void @store_trunc_v8i64i8(ptr %ap, ptr %dest) #0 {
; VBITS_GE_256-LABEL: store_trunc_v8i64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
Expand All @@ -52,39 +52,39 @@ define void @store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i8>* %dest) #0 {
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
%a = load <8 x i64>, <8 x i64>* %ap
%a = load <8 x i64>, ptr %ap
%val = trunc <8 x i64> %a to <8 x i8>
store <8 x i8> %val, <8 x i8>* %dest
store <8 x i8> %val, ptr %dest
ret void
}

define void @store_trunc_v16i64i8(<16 x i64>* %ap, <16 x i8>* %dest) vscale_range(8,0) #0 {
define void @store_trunc_v16i64i8(ptr %ap, ptr %dest) vscale_range(8,0) #0 {
; CHECK-LABEL: store_trunc_v16i64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl16
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%a = load <16 x i64>, <16 x i64>* %ap
%a = load <16 x i64>, ptr %ap
%val = trunc <16 x i64> %a to <16 x i8>
store <16 x i8> %val, <16 x i8>* %dest
store <16 x i8> %val, ptr %dest
ret void
}

define void @store_trunc_v32i64i8(<32 x i64>* %ap, <32 x i8>* %dest) vscale_range(16,0) #0 {
define void @store_trunc_v32i64i8(ptr %ap, ptr %dest) vscale_range(16,0) #0 {
; CHECK-LABEL: store_trunc_v32i64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%a = load <32 x i64>, <32 x i64>* %ap
%a = load <32 x i64>, ptr %ap
%val = trunc <32 x i64> %a to <32 x i8>
store <32 x i8> %val, <32 x i8>* %dest
store <32 x i8> %val, ptr %dest
ret void
}

define void @store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i16>* %dest) #0 {
define void @store_trunc_v8i64i16(ptr %ap, ptr %dest) #0 {
; Currently does not use the truncating store
; VBITS_GE_256-LABEL: store_trunc_v8i64i16:
; VBITS_GE_256: // %bb.0:
Expand All @@ -106,13 +106,13 @@ define void @store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i16>* %dest) #0 {
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
%a = load <8 x i64>, <8 x i64>* %ap
%a = load <8 x i64>, ptr %ap
%val = trunc <8 x i64> %a to <8 x i16>
store <8 x i16> %val, <8 x i16>* %dest
store <8 x i16> %val, ptr %dest
ret void
}

define void @store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i32>* %dest) #0 {
define void @store_trunc_v8i64i32(ptr %ap, ptr %dest) #0 {
; VBITS_GE_256-LABEL: store_trunc_v8i64i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
Expand All @@ -133,13 +133,13 @@ define void @store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i32>* %dest) #0 {
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
; VBITS_GE_512-NEXT: ret
%a = load <8 x i64>, <8 x i64>* %ap
%a = load <8 x i64>, ptr %ap
%val = trunc <8 x i64> %a to <8 x i32>
store <8 x i32> %val, <8 x i32>* %dest
store <8 x i32> %val, ptr %dest
ret void
}

define void @store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i8>* %dest) #0 {
define void @store_trunc_v16i32i8(ptr %ap, ptr %dest) #0 {
; Currently does not use the truncating store
; VBITS_GE_256-LABEL: store_trunc_v16i32i8:
; VBITS_GE_256: // %bb.0:
Expand All @@ -161,13 +161,13 @@ define void @store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i8>* %dest) #0 {
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1b { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
%a = load <16 x i32>, <16 x i32>* %ap
%a = load <16 x i32>, ptr %ap
%val = trunc <16 x i32> %a to <16 x i8>
store <16 x i8> %val, <16 x i8>* %dest
store <16 x i8> %val, ptr %dest
ret void
}

define void @store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i16>* %dest) #0 {
define void @store_trunc_v16i32i16(ptr %ap, ptr %dest) #0 {
; VBITS_GE_256-LABEL: store_trunc_v16i32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
Expand All @@ -188,13 +188,13 @@ define void @store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i16>* %dest) #0 {
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
; VBITS_GE_512-NEXT: ret
%a = load <16 x i32>, <16 x i32>* %ap
%a = load <16 x i32>, ptr %ap
%val = trunc <16 x i32> %a to <16 x i16>
store <16 x i16> %val, <16 x i16>* %dest
store <16 x i16> %val, ptr %dest
ret void
}

define void @store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i8>* %dest) #0 {
define void @store_trunc_v32i16i8(ptr %ap, ptr %dest) #0 {
; VBITS_GE_256-LABEL: store_trunc_v32i16i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
Expand All @@ -215,9 +215,9 @@ define void @store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i8>* %dest) #0 {
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: st1b { z0.h }, p0, [x1]
; VBITS_GE_512-NEXT: ret
%a = load <32 x i16>, <32 x i16>* %ap
%a = load <32 x i16>, ptr %ap
%val = trunc <32 x i16> %a to <32 x i8>
store <32 x i8> %val, <32 x i8>* %dest
store <32 x i8> %val, ptr %dest
ret void
}

Expand Down
124 changes: 62 additions & 62 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll

Large diffs are not rendered by default.

240 changes: 120 additions & 120 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll

Large diffs are not rendered by default.