Skip to content

Commit

Permalink
[Clang][SME2] Add builtins for moving multi-vectors to/from ZA (#71191)
Browse files Browse the repository at this point in the history
Adds the following SME2 builtins:
 - svread_hor/ver,
 - svwrite_hor/ver,
 - svread_za64,
 - svwrite_za64

See ARM-software/acle#217
  • Loading branch information
kmclaughlin-arm committed Dec 19, 2023
1 parent 6905438 commit e9af57d
Show file tree
Hide file tree
Showing 7 changed files with 3,814 additions and 4 deletions.
38 changes: 38 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,44 @@ multiclass ZAAddSub<string n_suffix> {
defm SVADD : ZAAddSub<"add">;
defm SVSUB : ZAAddSub<"sub">;

// SME2 - MOVA

//
// Single, 2 and 4 vector-group read/write intrinsics.
//

multiclass ZAWrite_VG<string n, string t, string i, list<ImmCheck> checks> {
def NAME # _VG2_H : Inst<"svwrite_hor_" # n # "[_{d}]_vg2", "vim2", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsStreaming], checks>;
def NAME # _VG2_V : Inst<"svwrite_ver_" # n # "[_{d}]_vg2", "vim2", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsStreaming], checks>;
def NAME # _VG4_H : Inst<"svwrite_hor_" # n # "[_{d}]_vg4", "vim4", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsStreaming], checks>;
def NAME # _VG4_V : Inst<"svwrite_ver_" # n # "[_{d}]_vg4", "vim4", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsStreaming], checks>;
def NAME # _VG1x2 : Inst<"svwrite_" # n # "[_{d}]_vg1x2", "vm2", t, MergeNone, i # "_vg1x2", [IsSharedZA, IsStreaming], []>;
def NAME # _VG1x4 : Inst<"svwrite_" # n # "[_{d}]_vg1x4", "vm4", t, MergeNone, i # "_vg1x4", [IsSharedZA, IsStreaming], []>;
}

let TargetGuard = "sme2" in {
defm SVWRITE_ZA8 : ZAWrite_VG<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>;
defm SVWRITE_ZA16 : ZAWrite_VG<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>;
defm SVWRITE_ZA32 : ZAWrite_VG<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>;
defm SVWRITE_ZA64 : ZAWrite_VG<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>;
}

multiclass ZARead_VG<string n, string t, string i, list<ImmCheck> checks> {
def NAME # _VG2_H : Inst<"svread_hor_" # n # "_{d}_vg2", "2im", t, MergeNone, i # "_hor_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>;
def NAME # _VG2_V : Inst<"svread_ver_" # n # "_{d}_vg2", "2im", t, MergeNone, i # "_ver_vg2", [IsSharedZA, IsPreservesZA, IsStreaming], checks>;
def NAME # _VG4_H : Inst<"svread_hor_" # n # "_{d}_vg4", "4im", t, MergeNone, i # "_hor_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>;
def NAME # _VG4_V : Inst<"svread_ver_" # n # "_{d}_vg4", "4im", t, MergeNone, i # "_ver_vg4", [IsSharedZA, IsPreservesZA, IsStreaming], checks>;
def NAME # _VG1x2 : Inst<"svread_" # n # "_{d}_vg1x2", "2m", t, MergeNone, i # "_vg1x2", [IsSharedZA, IsPreservesZA, IsStreaming], []>;
def NAME # _VG1x4 : Inst<"svread_" # n # "_{d}_vg1x4", "4m", t, MergeNone, i # "_vg1x4", [IsSharedZA, IsPreservesZA, IsStreaming], []>;
}

let TargetGuard = "sme2" in {
defm SVREAD_ZA8 : ZARead_VG<"za8", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>;
defm SVREAD_ZA16 : ZARead_VG<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>;
defm SVREAD_ZA32 : ZARead_VG<"za32", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>;
defm SVREAD_ZA64 : ZARead_VG<"za64", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>;
}

//
// Outer product and accumulate/subtract
//
Expand Down
1,880 changes: 1,880 additions & 0 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c

Large diffs are not rendered by default.

1,457 changes: 1,457 additions & 0 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c

Large diffs are not rendered by default.

51 changes: 51 additions & 0 deletions clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,57 @@

#include <arm_sme_draft_spec_subject_to_change.h>

void test_multivector_read(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za {

// Test Tile Range
svread_hor_za8_u8_vg2(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
svread_ver_za8_u8_vg2(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
svread_hor_za8_u8_vg4(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
svread_ver_za8_u8_vg4(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}

svread_hor_za16_u16_vg2(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
svread_ver_za16_u16_vg2(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
svread_hor_za16_u16_vg4(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
svread_ver_za16_u16_vg4(2, base); // expected-error {{argument value 2 is outside the valid range [0, 1]}}

svread_hor_za32_u32_vg2(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svread_ver_za32_u32_vg2(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svread_hor_za32_u32_vg4(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svread_ver_za32_u32_vg4(4, base); // expected-error {{argument value 4 is outside the valid range [0, 3]}}

svread_hor_za64_u64_vg2(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svread_ver_za64_u64_vg2(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svread_hor_za64_u64_vg4(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svread_ver_za64_u64_vg4(8, base); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
}

void test_multivector_write(uint32_t base, svuint8x2_t v8x2, svuint8x4_t v8x4,
svuint16x2_t v16x2, svuint16x4_t v16x4,
svuint32x2_t v32x2, svuint32x4_t v32x4,
svuint64x2_t v64x2, svuint64x4_t v64x4) __arm_streaming __arm_shared_za {

// Test Tile Range
svwrite_hor_za8_u8_vg2(1, base, v8x2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
svwrite_ver_za8_u8_vg2(1, base, v8x2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
svwrite_hor_za8_u8_vg4(1, base, v8x4); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
svwrite_ver_za8_u8_vg4(1, base, v8x4); // expected-error {{argument value 1 is outside the valid range [0, 0]}}

svwrite_hor_za16_u16_vg2(2, base, v16x2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
svwrite_ver_za16_u16_vg2(2, base, v16x2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
svwrite_hor_za16_u16_vg4(2, base, v16x4); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
svwrite_ver_za16_u16_vg4(2, base, v16x4); // expected-error {{argument value 2 is outside the valid range [0, 1]}}

svwrite_hor_za32_u32_vg2(4, base, v32x2); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svwrite_ver_za32_u32_vg2(4, base, v32x2); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svwrite_hor_za32_u32_vg4(4, base, v32x4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
svwrite_ver_za32_u32_vg4(4, base, v32x4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}

svwrite_hor_za64_u64_vg2(8, base, v64x2); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svwrite_ver_za64_u64_vg2(8, base, v64x2); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svwrite_hor_za64_u64_vg4(8, base, v64x4); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svwrite_ver_za64_u64_vg4(8, base, v64x4); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
}

void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32) __arm_streaming __arm_shared_za {
// Test Tile Range
svmopa_za32_u16_m(4, pred, pred, u16, u16); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
Expand Down
20 changes: 16 additions & 4 deletions llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -3718,8 +3718,14 @@ multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic, SDPatternOperator i

def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;

def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;

defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
MatrixOp8,
Expand Down Expand Up @@ -3811,8 +3817,14 @@ multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic, SDPatternOperator i

def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;

def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;

defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
MatrixOp8,
Expand Down

0 comments on commit e9af57d

Please sign in to comment.