121 changes: 121 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-pred.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s

define <vscale x 16 x i1> @test_pmov_to_pred_i8(<vscale x 16 x i8> %zn) {
; CHECK-LABEL: test_pmov_to_pred_i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res = call <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8> %zn, i32 0)
ret <vscale x 16 x i1> %res
}

define <vscale x 8 x i1> @test_pmov_to_pred_i16(<vscale x 8 x i16> %zn) {
; CHECK-LABEL: test_pmov_to_pred_i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: mov z8.d, z0.d
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16
; CHECK-NEXT: mov z0.d, z8.d
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: mov p4.b, p0.b
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: eor p0.b, p1/z, p4.b, p0.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res1 = call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> %zn, i32 0)
%res2 = call <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16> %zn, i32 1)

%res = add <vscale x 8 x i1> %res1, %res2
ret <vscale x 8 x i1> %res
}

define <vscale x 4 x i1> @test_pmov_to_pred_i32(<vscale x 4 x i32> %zn) {
; CHECK-LABEL: test_pmov_to_pred_i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: mov z8.d, z0.d
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32
; CHECK-NEXT: mov z0.d, z8.d
; CHECK-NEXT: mov w0, #3 // =0x3
; CHECK-NEXT: mov p4.b, p0.b
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: eor p0.b, p1/z, p4.b, p0.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res1 = call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> %zn, i32 0)
%res2 = call <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32> %zn, i32 3)

%res = add <vscale x 4 x i1> %res1, %res2
ret <vscale x 4 x i1> %res
}

define <vscale x 2 x i1> @test_pmov_to_pred_i64(<vscale x 2 x i64> %zn) {
; CHECK-LABEL: test_pmov_to_pred_i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: mov z8.d, z0.d
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64
; CHECK-NEXT: mov z0.d, z8.d
; CHECK-NEXT: mov w0, #7 // =0x7
; CHECK-NEXT: mov p4.b, p0.b
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: eor p0.b, p1/z, p4.b, p0.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res1 = call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> %zn, i32 0)
%res2 = call <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64> %zn, i32 7)

%res = add <vscale x 2 x i1> %res1, %res2
ret <vscale x 2 x i1> %res
}

declare <vscale x 16 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv16i8(<vscale x 16 x i8>, i32)
declare <vscale x 8 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv8i16(<vscale x 8 x i16>, i32)
declare <vscale x 4 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv4i32(<vscale x 4 x i32>, i32)
declare <vscale x 2 x i1> @llvm.aarch64.sve.pmov.to.pred.lane.nxv2i64(<vscale x 2 x i64>, i32)
117 changes: 117 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-pmov-to-vector.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s

; Merge

define <vscale x 8 x i16> @test_pmov_to_vector_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i1> %pn) {
; CHECK-LABEL: test_pmov_to_vector_i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i1> %pn, i32 1)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_pmov_to_vector_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i1> %pn) {
; CHECK-LABEL: test_pmov_to_vector_i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: mov w0, #3 // =0x3
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i1> %pn, i32 3)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_pmov_to_vector_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i1> %pn) {
; CHECK-LABEL: test_pmov_to_vector_i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: mov w0, #7 // =0x7
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i1> %pn, i32 7)
ret <vscale x 2 x i64> %res
}


; Zero

define <vscale x 16 x i8> @test_pmov_to_vector_zero_i8(<vscale x 16 x i1> %pn) {
; CHECK-LABEL: test_pmov_to_vector_zero_i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1> %pn)
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @test_pmov_to_vector_zero_i16(<vscale x 8 x i1> %pn) {
; CHECK-LABEL: test_pmov_to_vector_zero_i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1> %pn)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_pmov_to_vector_zero_i32(<vscale x 4 x i1> %pn) {
; CHECK-LABEL: test_pmov_to_vector_zero_i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1> %pn)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_pmov_to_vector_zero_i64(<vscale x 2 x i1> %pn) {
; CHECK-LABEL: test_pmov_to_vector_zero_i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1> %pn)
ret <vscale x 2 x i64> %res
}

declare <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.merging.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv16i8(<vscale x 16 x i1>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv8i16(<vscale x 8 x i1>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv4i32(<vscale x 4 x i1>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.pmov.to.vector.lane.zeroing.nxv2i64(<vscale x 2 x i1>)
83 changes: 83 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s

define <vscale x 16 x i8> @test_tblq_i8 (<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: test_tblq_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: tblq z0.b, { z0.b }, z1.b
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @test_tblq_i16 (<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_tblq_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: tblq z0.h, { z0.h }, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_tblq_i32 (<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
; CHECK-LABEL: test_tblq_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: tblq z0.s, { z0.s }, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_tblq_i64 (<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
; CHECK-LABEL: test_tblq_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: tblq z0.d, { z0.d }, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
ret <vscale x 2 x i64> %res
}

define <vscale x 8 x half> @test_tblq_f16(<vscale x 8 x half> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_tblq_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: tblq z0.h, { z0.h }, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x i16> %zm)
ret <vscale x 8 x half> %res
}

define <vscale x 4 x float> @test_tblq_f32(<vscale x 4 x float> %zn, <vscale x 4 x i32> %zm) {
; CHECK-LABEL: test_tblq_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: tblq z0.s, { z0.s }, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x i32> %zm)
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @test_tblq_f64(<vscale x 2 x double> %zn, <vscale x 2 x i64> %zm) {
; CHECK-LABEL: test_tblq_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: tblq z0.d, { z0.d }, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x i64> %zm)
ret <vscale x 2 x double> %res
}

define <vscale x 8 x bfloat> @test_tblq_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_tblq_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: tblq z0.h, { z0.h }, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm)
ret <vscale x 8 x bfloat> %res
}

declare <vscale x 16 x i8> @llvm.aarch64.sve.tblq.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.tblq.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.tblq.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.tblq.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.aarch64.sve.tblq.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
declare <vscale x 4 x float> @llvm.aarch64.sve.tblq.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
declare <vscale x 2 x double> @llvm.aarch64.sve.tblq.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tblq.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>)
83 changes: 83 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s

define <vscale x 16 x i8> @test_tbxq_i8 (<vscale x 16 x i8> %passthru, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: test_tbxq_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: tbxq z0.b, z1.b, z2.b
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8> %passthru, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @test_tbxq_i16 (<vscale x 8 x i16> %passthru, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_tbxq_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: tbxq z0.h, z1.h, z2.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16> %passthru, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_tbxq_i32 (<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
; CHECK-LABEL: test_tbxq_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: tbxq z0.s, z1.s, z2.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_tbxq_i64 (<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
; CHECK-LABEL: test_tbxq_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: tbxq z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
ret <vscale x 2 x i64> %res
}

define <vscale x 8 x half> @test_tblq_f16(<vscale x 8 x half> %passthru, <vscale x 8 x half> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_tblq_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: tbxq z0.h, z1.h, z2.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half> %passthru, <vscale x 8 x half> %zn, <vscale x 8 x i16> %zm)
ret <vscale x 8 x half> %res
}

define <vscale x 4 x float> @test_tbxq_f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %zn, <vscale x 4 x i32> %zm) {
; CHECK-LABEL: test_tbxq_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: tbxq z0.s, z1.s, z2.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %zn, <vscale x 4 x i32> %zm)
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @test_tbxq_f64(<vscale x 2 x double> %passthru, <vscale x 2 x double> %zn, <vscale x 2 x i64> %zm) {
; CHECK-LABEL: test_tbxq_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: tbxq z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double> %passthru, <vscale x 2 x double> %zn, <vscale x 2 x i64> %zm)
ret <vscale x 2 x double> %res
}

define <vscale x 8 x bfloat> @test_tbxq_bf16(<vscale x 8 x bfloat> %passthru, <vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_tbxq_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: tbxq z0.h, z1.h, z2.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat> %passthru, <vscale x 8 x bfloat> %zn, <vscale x 8 x i16> %zm)
ret <vscale x 8 x bfloat> %res
}

declare <vscale x 16 x i8> @llvm.aarch64.sve.tbxq.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.tbxq.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.tbxq.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.tbxq.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.aarch64.sve.tbxq.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i16>)
declare <vscale x 4 x float> @llvm.aarch64.sve.tbxq.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i32>)
declare <vscale x 2 x double> @llvm.aarch64.sve.tbxq.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i64>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbxq.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i16>)
85 changes: 85 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s

define <vscale x 16 x i8> @test_uzpq1_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: test_uzpq1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq1 z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @test_uzpq1_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_uzpq1_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq1 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_uzpq1_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
; CHECK-LABEL: test_uzpq1_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq1 z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_uzpq1_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
; CHECK-LABEL: test_uzpq1_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq1 z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
ret <vscale x 2 x i64> %res
}

define <vscale x 8 x half> @test_uzpq1_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
; CHECK-LABEL: test_uzpq1_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq1 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
ret <vscale x 8 x half> %res
}

define <vscale x 4 x float> @test_uzpq1_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
; CHECK-LABEL: test_uzpq1_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq1 z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @test_uzpq1_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
; CHECK-LABEL: test_uzpq1_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq1 z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
ret <vscale x 2 x double> %res
}

define <vscale x 8 x bfloat> @test_uzpq1_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: test_uzpq1_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq1 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
ret <vscale x 8 x bfloat> %res
}


declare <vscale x 16 x i8> @llvm.aarch64.sve.uzpq1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uzpq1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uzpq1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uzpq1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 8 x half> @llvm.aarch64.sve.uzpq1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.uzpq1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.uzpq1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
85 changes: 85 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s

define <vscale x 16 x i8> @test_uzpq2_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: test_uzpq2_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq2 z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @test_uzpq2_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_uzpq2_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq2 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_uzpq2_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
; CHECK-LABEL: test_uzpq2_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq2 z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_uzpq2_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
; CHECK-LABEL: test_uzpq2_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq2 z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
ret <vscale x 2 x i64> %res
}

define <vscale x 8 x half> @test_uzpq2_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
; CHECK-LABEL: test_uzpq2_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq2 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
ret <vscale x 8 x half> %res
}

define <vscale x 4 x float> @test_uzpq2_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
; CHECK-LABEL: test_uzpq2_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq2 z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @test_uzpq2_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
; CHECK-LABEL: test_uzpq2_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq2 z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
ret <vscale x 2 x double> %res
}

define <vscale x 8 x bfloat> @test_uzpq2_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: test_uzpq2_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: uzpq2 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
ret <vscale x 8 x bfloat> %res
}


declare <vscale x 16 x i8> @llvm.aarch64.sve.uzpq2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uzpq2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uzpq2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uzpq2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 8 x half> @llvm.aarch64.sve.uzpq2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.uzpq2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.uzpq2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzpq2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
85 changes: 85 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s

define <vscale x 16 x i8> @test_zipq1_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: test_zipq1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq1 z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @test_zipq1_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_zipq1_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq1 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_zipq1_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
; CHECK-LABEL: test_zipq1_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq1 z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_zipq1_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
; CHECK-LABEL: test_zipq1_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq1 z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
ret <vscale x 2 x i64> %res
}

define <vscale x 8 x half> @test_zipq1_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
; CHECK-LABEL: test_zipq1_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq1 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
ret <vscale x 8 x half> %res
}

define <vscale x 4 x float> @test_zipq1_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
; CHECK-LABEL: test_zipq1_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq1 z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @test_zipq1_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
; CHECK-LABEL: test_zipq1_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq1 z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
ret <vscale x 2 x double> %res
}

define <vscale x 8 x bfloat> @test_zipq1_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: test_zipq1_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq1 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
ret <vscale x 8 x bfloat> %res
}


declare <vscale x 16 x i8> @llvm.aarch64.sve.zipq1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.zipq1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.zipq1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.zipq1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 8 x half> @llvm.aarch64.sve.zipq1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.zipq1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.zipq1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
85 changes: 85 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s

define <vscale x 16 x i8> @test_zipq2_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
; CHECK-LABEL: test_zipq2_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq2 z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @test_zipq2_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
; CHECK-LABEL: test_zipq2_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq2 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_zipq2_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
; CHECK-LABEL: test_zipq2_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq2 z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_zipq2_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
; CHECK-LABEL: test_zipq2_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq2 z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
ret <vscale x 2 x i64> %res
}

define <vscale x 8 x half> @test_zipq2_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
; CHECK-LABEL: test_zipq2_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq2 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
ret <vscale x 8 x half> %res
}

define <vscale x 4 x float> @test_zipq2_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
; CHECK-LABEL: test_zipq2_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq2 z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @test_zipq2_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
; CHECK-LABEL: test_zipq2_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq2 z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
ret <vscale x 2 x double> %res
}

define <vscale x 8 x bfloat> @test_zipq2_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
; CHECK-LABEL: test_zipq2_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: zipq2 z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
ret <vscale x 8 x bfloat> %res
}


declare <vscale x 16 x i8> @llvm.aarch64.sve.zipq2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.zipq2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.zipq2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.zipq2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 8 x half> @llvm.aarch64.sve.zipq2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.zipq2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.zipq2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zipq2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)