diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea28..10aa0d1709a74 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,21 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + +multiclass ZAReadz ch> { + let TargetGuard = "sme2p1" in { + def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}", "dim", t, + MergeNone, i_prefix # "_horiz", + [IsStreaming, IsInOutZA], ch>; + + def NAME # _V : SInst<"svreadz_ver_" # n_suffix # "_{d}", "dim", t, + MergeNone, i_prefix # "_vert", + [IsStreaming, IsInOutZA], ch>; + } +} + +defm SVREADZ_ZA8 : ZAReadz<"za8", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>; +defm SVREADZ_ZA16 : ZAReadz<"za16", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>; +defm SVREADZ_ZA32 : ZAReadz<"za32", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>; +defm SVREADZ_ZA64 : ZAReadz<"za64", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>; +defm SVREADZ_ZA128 : ZAReadz<"za128", "csilUcUiUsUlbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>; diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c new file mode 100644 index 0000000000000..a0b5a882d53b2 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c @@ -0,0 +1,417 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za8_s8( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z23test_svreadz_hor_za8_s8j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svreadz_hor_za8_s8(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za8_s8(0, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za8_u8( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z23test_svreadz_hor_za8_u8j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svreadz_hor_za8_u8(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za8_u8(0, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_s16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 0, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za16_s16j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 0, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint16_t test_svreadz_hor_za16_s16(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za16_s16(0, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_u16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 1, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za16_u16j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 1, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint16_t test_svreadz_hor_za16_u16(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za16_u16(1, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv8f16(i32 0, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za16_f16j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv8f16(i32 0, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat16_t test_svreadz_hor_za16_f16(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za16_f16(0, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_bf16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv8bf16(i32 1, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za16_bf16j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv8bf16(i32 1, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svreadz_hor_za16_bf16(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za16_bf16(1, slice); +} + + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za32_s32( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 0, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za32_s32j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 0, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint32_t test_svreadz_hor_za32_s32(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za32_s32(0, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za32_u32( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 2, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za32_u32j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 2, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint32_t test_svreadz_hor_za32_u32(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za32_u32(2, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za32_f32( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv4f32(i32 3, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za32_f32j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv4f32(i32 3, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_svreadz_hor_za32_f32(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za32_f32(3, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za64_s64( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 0, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za64_s64j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 0, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint64_t test_svreadz_hor_za64_s64(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za64_s64(0, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za64_u64( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 4, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za64_u64j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 4, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint64_t test_svreadz_hor_za64_u64(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za64_u64(4, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za64_f64( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv2f64(i32 7, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za64_f64j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv2f64(i32 7, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat64_t test_svreadz_hor_za64_f64(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za64_f64(7, slice); +} + +// ZA128 +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_s8( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 0, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za128_s8j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 0, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svreadz_hor_za128_s8(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_s8(0, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_u8( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 1, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_hor_za128_u8j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 1, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svreadz_hor_za128_u8(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_u8(1, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_s16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 2, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za128_s16j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 2, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint16_t test_svreadz_hor_za128_s16(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_s16(2, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_u16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 3, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za128_u16j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 3, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint16_t test_svreadz_hor_za128_u16(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_u16(3, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv8f16(i32 4, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za128_f16j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv8f16(i32 4, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat16_t test_svreadz_hor_za128_f16(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_f16(4, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_bf16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv8bf16(i32 5, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z27test_svreadz_hor_za128_bf16j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv8bf16(i32 5, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbfloat16_t test_svreadz_hor_za128_bf16(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_bf16(5, slice); +} + + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_s32( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 6, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za128_s32j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 6, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint32_t test_svreadz_hor_za128_s32(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_s32(6, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_u32( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 7, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za128_u32j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 7, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint32_t test_svreadz_hor_za128_u32(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_u32(7, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_f32( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv4f32(i32 8, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za128_f32j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv4f32(i32 8, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_svreadz_hor_za128_f32(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_f32(8, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_s64( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv2i64(i32 13, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za128_s64j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv2i64(i32 13, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint64_t test_svreadz_hor_za128_s64(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_s64(13, slice); +} +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_u64( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv2i64(i32 14, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za128_u64j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv2i64(i32 14, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint64_t test_svreadz_hor_za128_u64(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_u64(14, slice); +} + +// CHECK-LABEL: define dso_local @test_svreadz_hor_za128_f64( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv2f64(i32 15, i32 [[SLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za128_f64j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.q.horiz.nxv2f64(i32 15, i32 [[SLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat64_t test_svreadz_hor_za128_f64(uint32_t slice) __arm_streaming __arm_inout("za") +{ + return svreadz_hor_za128_f64(15, slice); +} diff --git a/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp b/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp new file mode 100644 index 0000000000000..a97790d0be7f1 --- /dev/null +++ b/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sve2 -target-feature +sme2p1 -target-feature +bf16 -fsyntax-only -verify %s + +// REQUIRES: aarch64-registered-target + +#include + +void tests_readz_tile_to_vector_single(uint32_t slice) __arm_streaming __arm_inout("za") { + svreadz_hor_za8_s8(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 0]}} + svreadz_hor_za16_s16(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svreadz_hor_za32_s32(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svreadz_hor_za64_s64(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svreadz_hor_za128_s8(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + svreadz_hor_za128_s16(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + svreadz_hor_za128_s32(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + svreadz_hor_za128_s64(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + svreadz_hor_za128_bf16(-1, slice); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + return; +} + + diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index bcaa37de74b63..c2984f2928c92 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2839,6 +2839,18 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic; def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic; + + class SME_MOVAZ_TileToVector_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, ImmArg>]>; + + def int_aarch64_sme_readz_horiz : SME_MOVAZ_TileToVector_Intrinsic; + def int_aarch64_sme_readz_vert : SME_MOVAZ_TileToVector_Intrinsic; + + def int_aarch64_sme_readz_q_horiz : SME_MOVAZ_TileToVector_Intrinsic; + def int_aarch64_sme_readz_q_vert : SME_MOVAZ_TileToVector_Intrinsic; + def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>]>; class SME_OuterProduct_Intrinsic @@ -3646,4 +3658,4 @@ def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic; def int_aarch64_sve_pmov_to_vector_lane_merging : SVE2_Pred_1VectorArgIndexed_Intrinsic; -def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic; \ No newline at end of file +def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 80181a77c9d23..16f8ab7111741 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2832,6 +2832,23 @@ AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg, return BB; } +MachineBasicBlock * +AArch64TargetLowering::EmitTileMovaz(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); + + MIB.add(MI.getOperand(0)); // Output ZPR + MIB.addReg(BaseReg + MI.getOperand(1).getImm(), + RegState::Define); // Output ZA Tile + MIB.addReg(BaseReg + MI.getOperand(1).getImm()); // Input Za Tile + MIB.add(MI.getOperand(2)); // slice index register + MIB.add(MI.getOperand(3)); // slice index offset + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock * AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); @@ -2866,19 +2883,27 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI, MachineBasicBlock * AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, - MachineInstr &MI, - MachineBasicBlock *BB, bool HasTile) const { + MachineInstr &MI, MachineBasicBlock *BB, + bool HasTile, bool HasZPROut) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); unsigned StartIdx = 0; - - if (HasTile) { - MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); - MIB.addReg(BaseReg + MI.getOperand(0).getImm()); - StartIdx = 1; - } else - MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); - + if (HasZPROut) { + if (HasTile) { + MIB.add(MI.getOperand(0)); // Output ZPR + MIB.addReg(BaseReg + MI.getOperand(1).getImm(), + RegState::Define); // Output ZA Tile + MIB.addReg(BaseReg + MI.getOperand(1).getImm()); // Input Za Tile + StartIdx = 2; + } + } else { + if (HasTile) { + MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); + MIB.addReg(BaseReg + MI.getOperand(0).getImm()); + StartIdx = 1; + } else + MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); + } for (unsigned I = StartIdx; I < MI.getNumOperands(); ++I) MIB.add(MI.getOperand(I)); @@ -2913,17 +2938,58 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask; switch (SMEMatrixType) { case (AArch64::SMEMatrixArray): - return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false); + return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false, + /*HasZPROut*/ false); case (AArch64::SMEMatrixTileB): - return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, /*HasTile*/ true); + switch (MI.getOpcode()) { + case AArch64::MOVAZ_ZMI_H_B_PSEUDO: + case AArch64::MOVAZ_ZMI_V_B_PSEUDO: + return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, + /*HasTile*/ true, /*HasZPROut*/ true); + default: + return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, + /*HasTile*/ true, /*HasZPROut*/ false); + } case (AArch64::SMEMatrixTileH): - return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, /*HasTile*/ true); + switch (MI.getOpcode()) { + case AArch64::MOVAZ_ZMI_H_H_PSEUDO: + case AArch64::MOVAZ_ZMI_V_H_PSEUDO: + return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, + /*HasTile*/ true, /*HasZPROut*/ true); + default: + return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, + /*HasTile*/ true, /*HasZPROut*/ false); + } case (AArch64::SMEMatrixTileS): - return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB, /*HasTile*/ true); + switch (MI.getOpcode()) { + case AArch64::MOVAZ_ZMI_H_S_PSEUDO: + case AArch64::MOVAZ_ZMI_V_S_PSEUDO: + return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB, + /*HasTile*/ true, /*HasZPROut*/ true); + default: + return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB, + /*HasTile*/ true, /*HasZPROut*/ false); + } case (AArch64::SMEMatrixTileD): - return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB, /*HasTile*/ true); + switch (MI.getOpcode()) { + case AArch64::MOVAZ_ZMI_H_D_PSEUDO: + case AArch64::MOVAZ_ZMI_V_D_PSEUDO: + return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB, + /*HasTile*/ true, /*HasZPROut*/ true); + default: + return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB, + /*HasTile*/ true, /*HasZPROut*/ false); + } case (AArch64::SMEMatrixTileQ): - return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, /*HasTile*/ true); + switch (MI.getOpcode()) { + case AArch64::MOVAZ_ZMI_H_Q_PSEUDO: + case AArch64::MOVAZ_ZMI_V_Q_PSEUDO: + return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, + /*HasTile*/ true, /*HasZPROut*/ true); + default: + return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, + /*HasTile*/ true, /*HasZPROut*/ false); + } } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 18439dc7f0102..da1ab2a10add4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -635,10 +635,13 @@ class AArch64TargetLowering : public TargetLowering { MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitTileMovaz(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB, - bool HasTile) const; + bool HasTile, bool HasZPROut) const; MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, bool Op0IsDef) const; MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 2db0fa2534345..e6c800d55077b 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -777,7 +777,8 @@ defm FSUB_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fsub", 0b1001, MatrixOp64 } let Predicates = [HasSME2p1] in { -defm MOVAZ_ZMI : sme2p1_movaz_tile_to_vec<"movaz">; +defm MOVAZ_ZMI : sme2p1_movaz_tile_to_vec<"movaz", int_aarch64_sme_readz_horiz, int_aarch64_sme_readz_vert, + int_aarch64_sme_readz_q_horiz, int_aarch64_sme_readz_q_vert>; defm MOVAZ_2ZMI : sme2p1_movaz_tile_to_vec_vg2<"movaz">; defm MOVAZ_4ZMI : sme2p1_movaz_tile_to_vec_vg4<"movaz">; defm MOVAZ_VG2_2ZM : sme2_mova_array_to_vec_vg2_multi<0b010, "movaz">; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 3363aab4b093c..65228910f1d9d 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -104,6 +104,13 @@ class sme2_move_to_tile_pseudo + : SMEPseudo2Instr, + Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> { + let SMEMatrixType = za_flag; + let usesCustomInserter = 1; +} + //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// @@ -189,6 +196,11 @@ class SME2_Tile_VG4_Multi_Pat(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; + +class SME2_Tile_Movaz_Pat + : Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))), + (!cast(name # _PSEUDO) $tile, $base, $offset)>; + //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// @@ -4029,6 +4041,7 @@ multiclass sme2_mova_tile_to_vec_vg2_multi{ defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b000, mnemonic>; } + // SME2p1 move tile to vector and zero tile, two registers multiclass sme2p1_movaz_tile_to_vec_vg2{ defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>; @@ -4697,14 +4710,14 @@ class sme2p1_movaz_tile_to_vec_base sz, bit q, bit v, ZPRRegOp vector_ty multiclass sme2p1_movaz_tile_to_vec_base { def _B : sme2p1_movaz_tile_to_vec_base<0b00, 0b0, v, ZPR8, !if(v, TileVectorOpV8, TileVectorOpH8), - sme_elm_idx0_15, mnemonic> { + sme_elm_idx0_15, mnemonic>, SMEPseudo2Instr { bits<4> imm; let Inst{8-5} = imm; } def _H : sme2p1_movaz_tile_to_vec_base<0b01, 0b0, v, ZPR16, !if(v, TileVectorOpV16, TileVectorOpH16), - sme_elm_idx0_7, mnemonic> { + sme_elm_idx0_7, mnemonic>, SMEPseudo2Instr { bits<1> ZAn; bits<3> imm; let Inst{8} = ZAn; @@ -4713,7 +4726,7 @@ multiclass sme2p1_movaz_tile_to_vec_base { def _S : sme2p1_movaz_tile_to_vec_base<0b10, 0b0, v, ZPR32, !if(v, TileVectorOpV32, TileVectorOpH32), - sme_elm_idx0_3, mnemonic> { + sme_elm_idx0_3, mnemonic>, SMEPseudo2Instr { bits<2> ZAn; bits<2> imm; let Inst{8-7} = ZAn; @@ -4722,7 +4735,7 @@ multiclass sme2p1_movaz_tile_to_vec_base { def _D : sme2p1_movaz_tile_to_vec_base<0b11, 0b0, v, ZPR64, !if(v, TileVectorOpV64, TileVectorOpH64), - sme_elm_idx0_1, mnemonic> { + sme_elm_idx0_1, mnemonic>, SMEPseudo2Instr { bits<3> ZAn; bits<1> imm; let Inst{8-6} = ZAn; @@ -4731,15 +4744,66 @@ multiclass sme2p1_movaz_tile_to_vec_base { def _Q : sme2p1_movaz_tile_to_vec_base<0b11, 0b1, v, ZPR128, !if(v, TileVectorOpV128, TileVectorOpH128), - sme_elm_idx0_0, mnemonic> { + sme_elm_idx0_0, mnemonic>, SMEPseudo2Instr { bits<4> ZAn; let Inst{8-5} = ZAn; } } -multiclass sme2p1_movaz_tile_to_vec{ +multiclass sme2p1_movaz_tile_to_vec{ defm _H : sme2p1_movaz_tile_to_vec_base<0b0, mnemonic>; defm _V : sme2p1_movaz_tile_to_vec_base<0b1, mnemonic>; + + def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo; + def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo; + def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo; + def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo; + def NAME # _H_Q_PSEUDO : sme2_movez_to_tile_pseudo; + + def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo; + def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo; + def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo; + def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo; + def NAME # _V_Q_PSEUDO : sme2_movez_to_tile_pseudo; + + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + + // H_Q + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + + // _V_Q + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; + def : SME2_Tile_Movaz_Pat; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-movaz.ll b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-movaz.ll new file mode 100644 index 0000000000000..7c556e09c2e96 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2p1-intrinsics-movaz.ll @@ -0,0 +1,445 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1 -verify-machineinstrs < %s | FileCheck %s + +;MOVAZ (tile to vector, single) + +;; +; Horiz +;; +define @test_readz_hor_z8_i8(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z8_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.b, za0h.b[w12, 0] +; CHECK-NEXT: movaz z0.b, za0h.b[w12, 14] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 %slice) + %slice.max = add i32 %slice, 14 + %res2 = call @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 %slice.max) + ret %res2 +} + +define @test_readz_hor_z16_i16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z16_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.h, za0h.h[w12, 0] +; CHECK-NEXT: movaz z0.h, za1h.h[w12, 7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 0, i32 %slice) + %slice.max = add i32 %slice, 7 + %res2 = call @llvm.aarch64.sme.readz.horiz.nxv8i16(i32 1, i32 %slice.max) + ret %res2 +} + +define @test_readz_hor_z32_i32(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z32_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.s, za0h.s[w12, 0] +; CHECK-NEXT: movaz z0.s, za3h.s[w12, 3] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 0, i32 %slice) + %slice.max = add i32 %slice, 3 + %res2 = call @llvm.aarch64.sme.readz.horiz.nxv4i32(i32 3, i32 %slice.max) + ret %res2 +} + +define @test_readz_hor_z64_i64(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z64_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.d, za0h.d[w12, 0] +; CHECK-NEXT: movaz z1.d, za7h.d[w12, 1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 0, i32 %slice) + %slice.max = add i32 %slice, 1 + %res2 = call @llvm.aarch64.sme.readz.horiz.nxv2i64(i32 7, i32 %slice.max) + ret %res +} + +define @test_readz_hor_z16_bf16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z16_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.h, za0h.h[w12, 0] +; CHECK-NEXT: movaz z0.h, za1h.h[w12, 7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.horiz.nxv8bf16(i32 0, i32 %slice) + %slice.max = add i32 %slice, 7 + %res2 = call @llvm.aarch64.sme.readz.horiz.nxv8bf16(i32 1, i32 %slice.max) + ret %res2 +} + +define @test_readz_hor_z16_f16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.h, za0h.h[w12, 0] +; CHECK-NEXT: movaz z0.h, za1h.h[w12, 7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.horiz.nxv8f16(i32 0, i32 %slice) + %slice.max = add i32 %slice, 7 + %res2 = call @llvm.aarch64.sme.readz.horiz.nxv8f16(i32 1, i32 %slice.max) + ret %res2 +} + +define @test_readz_hor_z32_f32(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.s, za0h.s[w12, 0] +; CHECK-NEXT: movaz z0.s, za3h.s[w12, 3] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.horiz.nxv4f32(i32 0, i32 %slice) + %slice.max = add i32 %slice, 3 + %res2 = call @llvm.aarch64.sme.readz.horiz.nxv4f32(i32 3, i32 %slice.max) + ret %res2 +} + +define @test_readz_hor_z64_f64(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.d, za0h.d[w12, 0] +; CHECK-NEXT: movaz z1.d, za7h.d[w12, 1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.horiz.nxv2f64(i32 0, i32 %slice) + %slice.max = add i32 %slice, 1 + %res2 = call @llvm.aarch64.sme.readz.horiz.nxv2f64(i32 7, i32 %slice.max) + ret %res +} + +define @test_readz_hor_z128_i8(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z128_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_hor_z128_i16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z128_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_hor_z128_i32(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z128_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_hor_z128_i64(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z128_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] +; CHECK-NEXT: movaz z1.q, za15h.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.horiz.nxv2i64(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.horiz.nxv2i64(i32 15, i32 %slice) + ret %res +} + +define @test_readz_hor_z128_bf16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z128_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.horiz.nxv8bf16(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.horiz.nxv8bf16(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_hor_z128_f16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z128_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.horiz.nxv8f16(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.horiz.nxv8f16(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_hor_z128_f32(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z128_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15h.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.horiz.nxv4f32(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.horiz.nxv4f32(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_hor_z128_f64(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_hor_z128_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0h.q[w12, 0] +; CHECK-NEXT: movaz z1.q, za15h.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.horiz.nxv2f64(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.horiz.nxv2f64(i32 15, i32 %slice) + ret %res +} + +;; +; Vert +;; +define @test_readz_ver_z8_i8(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z8_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.b, za0v.b[w12, 0] +; CHECK-NEXT: movaz z0.b, za0v.b[w12, 14] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.vert.nxv16i8(i32 0, i32 %slice) + %slice.max = add i32 %slice, 14 + %res2 = call @llvm.aarch64.sme.readz.vert.nxv16i8(i32 0, i32 %slice.max) + ret %res2 +} + +define @test_readz_ver_z16_i16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z16_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.h, za0v.h[w12, 0] +; CHECK-NEXT: movaz z0.h, za1v.h[w12, 7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.vert.nxv8i16(i32 0, i32 %slice) + %slice.max = add i32 %slice, 7 + %res2 = call @llvm.aarch64.sme.readz.vert.nxv8i16(i32 1, i32 %slice.max) + ret %res2 +} + +define @test_readz_ver_z32_i32(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z32_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.s, za0v.s[w12, 0] +; CHECK-NEXT: movaz z0.s, za3v.s[w12, 3] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.vert.nxv4i32(i32 0, i32 %slice) + %slice.max = add i32 %slice, 3 + %res2 = call @llvm.aarch64.sme.readz.vert.nxv4i32(i32 3, i32 %slice.max) + ret %res2 +} + +define @test_readz_ver_z64_i64(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z64_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.d, za0v.d[w12, 0] +; CHECK-NEXT: movaz z1.d, za7v.d[w12, 1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.vert.nxv2i64(i32 0, i32 %slice) + %slice.max = add i32 %slice, 1 + %res2 = call @llvm.aarch64.sme.readz.vert.nxv2i64(i32 7, i32 %slice.max) + ret %res +} + +define @test_readz_ver_z16_bf16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z16_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.h, za0v.h[w12, 0] +; CHECK-NEXT: movaz z0.h, za1v.h[w12, 7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.vert.nxv8bf16(i32 0, i32 %slice) + %slice.max = add i32 %slice, 7 + %res2 = call @llvm.aarch64.sme.readz.vert.nxv8bf16(i32 1, i32 %slice.max) + ret %res2 +} + +define @test_readz_ver_z16_f16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.h, za0v.h[w12, 0] +; CHECK-NEXT: movaz z0.h, za1v.h[w12, 7] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.vert.nxv8f16(i32 0, i32 %slice) + %slice.max = add i32 %slice, 7 + %res2 = call @llvm.aarch64.sme.readz.vert.nxv8f16(i32 1, i32 %slice.max) + ret %res2 +} + +define @test_readz_ver_z32_f32(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.s, za0v.s[w12, 0] +; CHECK-NEXT: movaz z0.s, za3v.s[w12, 3] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.vert.nxv4f32(i32 0, i32 %slice) + %slice.max = add i32 %slice, 3 + %res2 = call @llvm.aarch64.sme.readz.vert.nxv4f32(i32 3, i32 %slice.max) + ret %res2 +} + +define @test_readz_ver_z64_f64(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.d, za0v.d[w12, 0] +; CHECK-NEXT: movaz z1.d, za7v.d[w12, 1] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.vert.nxv2f64(i32 0, i32 %slice) + %slice.max = add i32 %slice, 1 + %res2 = call @llvm.aarch64.sme.readz.vert.nxv2f64(i32 7, i32 %slice.max) + ret %res +} + +define @test_readz_ver_z128_i8(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z128_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.vert.nxv16i8(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.vert.nxv16i8(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_ver_z128_i16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z128_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.vert.nxv8i16(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.vert.nxv8i16(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_ver_z128_i32(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z128_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.vert.nxv4i32(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.vert.nxv4i32(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_ver_z128_i64(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z128_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] +; CHECK-NEXT: movaz z1.q, za15v.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.vert.nxv2i64(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.vert.nxv2i64(i32 15, i32 %slice) + ret %res +} + +define @test_readz_ver_z128_bf16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z128_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.vert.nxv8bf16(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.vert.nxv8bf16(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_ver_z128_f16(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z128_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.vert.nxv8f16(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.vert.nxv8f16(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_ver_z128_f32(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z128_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] +; CHECK-NEXT: movaz z0.q, za15v.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.vert.nxv4f32(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.vert.nxv4f32(i32 15, i32 %slice) + ret %res2 +} + +define @test_readz_ver_z128_f64(i32 %tile, i32 %slice) { +; CHECK-LABEL: test_readz_ver_z128_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: movaz z0.q, za0v.q[w12, 0] +; CHECK-NEXT: movaz z1.q, za15v.q[w12, 0] +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sme.readz.q.vert.nxv2f64(i32 0, i32 %slice) + %res2 = call @llvm.aarch64.sme.readz.q.vert.nxv2f64(i32 15, i32 %slice) + ret %res +} + +declare @llvm.aarch64.sme.readz.horiz.nxv16i8(i32, i32) +declare @llvm.aarch64.sme.readz.horiz.nxv8i16(i32, i32) +declare @llvm.aarch64.sme.readz.horiz.nxv4i32(i32, i32) +declare @llvm.aarch64.sme.readz.horiz.nxv2i64(i32, i32) +declare @llvm.aarch64.sme.readz.horiz.nxv8bf16(i32, i32) +declare @llvm.aarch64.sme.readz.horiz.nxv8f16(i32, i32) +declare @llvm.aarch64.sme.readz.horiz.nxv4f32(i32, i32) +declare @llvm.aarch64.sme.readz.horiz.nxv2f64(i32, i32) +declare @llvm.aarch64.sme.readz.q.horiz.nxv16i8(i32, i32) +declare @llvm.aarch64.sme.readz.q.horiz.nxv8i16(i32, i32) +declare @llvm.aarch64.sme.readz.q.horiz.nxv4i32(i32, i32) +declare @llvm.aarch64.sme.readz.q.horiz.nxv2i64(i32, i32) +declare @llvm.aarch64.sme.readz.q.horiz.nxv8bf16(i32, i32) +declare @llvm.aarch64.sme.readz.q.horiz.nxv8f16(i32, i32) +declare @llvm.aarch64.sme.readz.q.horiz.nxv4f32(i32, i32) +declare @llvm.aarch64.sme.readz.q.horiz.nxv2f64(i32, i32) + + +declare @llvm.aarch64.sme.readz.vert.nxv16i8(i32, i32) +declare @llvm.aarch64.sme.readz.vert.nxv8i16(i32, i32) +declare @llvm.aarch64.sme.readz.vert.nxv4i32(i32, i32) +declare @llvm.aarch64.sme.readz.vert.nxv2i64(i32, i32) +declare @llvm.aarch64.sme.readz.vert.nxv8bf16(i32, i32) +declare @llvm.aarch64.sme.readz.vert.nxv8f16(i32, i32) +declare @llvm.aarch64.sme.readz.vert.nxv4f32(i32, i32) +declare @llvm.aarch64.sme.readz.vert.nxv2f64(i32, i32) +declare @llvm.aarch64.sme.readz.q.vert.nxv16i8(i32, i32) +declare @llvm.aarch64.sme.readz.q.vert.nxv8i16(i32, i32) +declare @llvm.aarch64.sme.readz.q.vert.nxv4i32(i32, i32) +declare @llvm.aarch64.sme.readz.q.vert.nxv2i64(i32, i32) +declare @llvm.aarch64.sme.readz.q.vert.nxv8bf16(i32, i32) +declare @llvm.aarch64.sme.readz.q.vert.nxv8f16(i32, i32) +declare @llvm.aarch64.sme.readz.q.vert.nxv4f32(i32, i32) +declare @llvm.aarch64.sme.readz.q.vert.nxv2f64(i32, i32)