diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index d4e5310fb3abc..a7efe78591635 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -4036,12 +4036,27 @@ class FunctionType : public Type { SME_NormalFunction = 0, SME_PStateSMEnabledMask = 1 << 0, SME_PStateSMCompatibleMask = 1 << 1, - SME_PStateZASharedMask = 1 << 2, - SME_PStateZAPreservedMask = 1 << 3, - SME_AttributeMask = 0b111'111 // We only support maximum 6 bits because of the - // bitmask in FunctionTypeExtraBitfields. + + // Describes the value of the state using ArmStateValue. + SME_ZAShift = 2, + SME_ZAMask = 0b111 << SME_ZAShift, + + SME_AttributeMask = 0b111'111 // We only support maximum 6 bits because of + // the bitmask in FunctionTypeExtraBitfields. + }; + + enum ArmStateValue : unsigned { + ARM_None = 0, + ARM_Preserves = 1, + ARM_In = 2, + ARM_Out = 3, + ARM_InOut = 4, }; + static ArmStateValue getArmZAState(unsigned AttrBits) { + return (ArmStateValue)((AttrBits & SME_ZAMask) >> SME_ZAShift); + } + /// A simple holder for various uncommon bits which do not fit in /// FunctionTypeBitfields. Aligned to alignof(void *) to maintain the /// alignment of subsequent objects in TrailingObjects. diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index a03b0e44e15f7..b9ec720dd9e19 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2539,16 +2539,45 @@ def ArmStreamingCompatible : TypeAttr, TargetSpecificAttr { let Documentation = [ArmSmeStreamingCompatibleDocs]; } -def ArmSharedZA : TypeAttr, TargetSpecificAttr { - let Spellings = [RegularKeyword<"__arm_shared_za">]; +def ArmNew : InheritableAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_new">]; + let Args = [VariadicStringArgument<"NewArgs">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [ArmNewDocs]; + + let AdditionalMembers = [{ + bool isNewZA() const { + return llvm::is_contained(newArgs(), "za"); + } + }]; +} + +def ArmIn : TypeAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_in">]; + let Args = [VariadicStringArgument<"InArgs">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmInDocs]; +} + +def ArmOut : TypeAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_out">]; + let Args = [VariadicStringArgument<"OutArgs">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmOutDocs]; +} + +def ArmInOut : TypeAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_inout">]; + let Args = [VariadicStringArgument<"InOutArgs">]; let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; - let Documentation = [ArmSmeSharedZADocs]; + let Documentation = [ArmInOutDocs]; } -def ArmPreservesZA : TypeAttr, TargetSpecificAttr { - let Spellings = [RegularKeyword<"__arm_preserves_za">]; +def ArmPreserves : TypeAttr, TargetSpecificAttr { + let Spellings = [RegularKeyword<"__arm_preserves">]; + let Args = [VariadicStringArgument<"PreserveArgs">]; let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; - let Documentation = [ArmSmePreservesZADocs]; + let Documentation = [ArmPreservesDocs]; } def ArmLocallyStreaming : InheritableAttr, TargetSpecificAttr { @@ -2557,14 +2586,6 @@ def ArmLocallyStreaming : InheritableAttr, TargetSpecificAttr { let Documentation = [ArmSmeLocallyStreamingDocs]; } -def ArmNewZA : InheritableAttr, TargetSpecificAttr { - let Spellings = [RegularKeyword<"__arm_new_za">]; - let Subjects = SubjectList<[Function], ErrorDiag>; - let Documentation = [ArmSmeNewZADocs]; -} -def : MutualExclusions<[ArmNewZA, ArmSharedZA]>; -def : MutualExclusions<[ArmNewZA, ArmPreservesZA]>; - def Pure : InheritableAttr { let Spellings = [GCC<"pure">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2e8d7752c9751..9e8190614fbe8 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6861,30 +6861,73 @@ without changing modes. }]; } -def ArmSmeSharedZADocs : Documentation { +def ArmInDocs : Documentation { let Category = DocCatArmSmeAttributes; let Content = [{ -The ``__arm_shared_za`` keyword applies to prototyped function types and specifies -that the function shares SME's matrix storage (ZA) with its caller. This -means that: +The ``__arm_in`` keyword applies to prototyped function types and specifies +that the function shares a given state S with its caller. For ``__arm_in``, the +function takes the state S as input and returns with the state S unchanged. -* the function requires that the processor implements the Scalable Matrix - Extension (SME). +The attribute takes string arguments to instruct the compiler which state +is shared. The supported states for S are: -* the function enters with ZA in an active state. +* ``"za"`` for Matrix Storage (requires SME) -* the function returns with ZA in an active state. +The attributes ``__arm_in(S)``, ``__arm_out(S)``, ``__arm_inout(S)`` and +``__arm_preserves(S)`` are all mutually exclusive for the same state S. }]; } -def ArmSmePreservesZADocs : Documentation { +def ArmOutDocs : Documentation { let Category = DocCatArmSmeAttributes; let Content = [{ -The ``__arm_preserves_za`` keyword applies to prototyped function types and -specifies that the function does not modify ZA state. +The ``__arm_out`` keyword applies to prototyped function types and specifies +that the function shares a given state S with its caller. For ``__arm_out``, +the function ignores the incoming state for S and returns new state for S. + +The attribute takes string arguments to instruct the compiler which state +is shared. The supported states for S are: + +* ``"za"`` for Matrix Storage (requires SME) + +The attributes ``__arm_in(S)``, ``__arm_out(S)``, ``__arm_inout(S)`` and +``__arm_preserves(S)`` are all mutually exclusive for the same state S. }]; } +def ArmInOutDocs : Documentation { + let Category = DocCatArmSmeAttributes; + let Content = [{ +The ``__arm_inout`` keyword applies to prototyped function types and specifies +that the function shares a given state S with its caller. For ``__arm_inout``, +the function takes the state S as input and returns new state for S. + +The attribute takes string arguments to instruct the compiler which state +is shared. The supported states for S are: + +* ``"za"`` for Matrix Storage (requires SME) + +The attributes ``__arm_in(S)``, ``__arm_out(S)``, ``__arm_inout(S)`` and +``__arm_preserves(S)`` are all mutually exclusive for the same state S. + }]; +} + +def ArmPreservesDocs : Documentation { + let Category = DocCatArmSmeAttributes; + let Content = [{ +The ``__arm_preserves`` keyword applies to prototyped function types and +specifies that the function does not read a given state S and returns +with state S unchanged. + +The attribute takes string arguments to instruct the compiler which state +is shared. The supported states for S are: + +* ``"za"`` for Matrix Storage (requires SME) + +The attributes ``__arm_in(S)``, ``__arm_out(S)``, ``__arm_inout(S)`` and +``__arm_preserves(S)`` are all mutually exclusive for the same state S. + }]; +} def ArmSmeLocallyStreamingDocs : Documentation { let Category = DocCatArmSmeAttributes; @@ -6907,13 +6950,18 @@ at the end of the function. }]; } -def ArmSmeNewZADocs : Documentation { +def ArmNewDocs : Documentation { let Category = DocCatArmSmeAttributes; let Content = [{ -The ``__arm_new_za`` keyword applies to function declarations and specifies -that the function will be set up with a fresh ZA context. +The ``__arm_new`` keyword applies to function declarations and specifies +that the function will create a new scope for state S. + +The attribute takes string arguments to instruct the compiler for which state +to create new scope. The supported states for S are: + +* ``"za"`` for Matrix Storage (requires SME) -This means that: +For state ``"za"``, this means that: * the function requires that the target processor implements the Scalable Matrix Extension (SME). @@ -6924,8 +6972,8 @@ This means that: * the function will disable PSTATE.ZA (by setting it to 0) before returning. -For ``__arm_new_za`` functions Clang will set up the ZA context automatically -on entry to the function, and disable it before returning. For example, if ZA is +For ``__arm_new("za")`` functions Clang will set up the ZA context automatically +on entry to the function and disable it before returning. For example, if ZA is in a dormant state Clang will generate the code to commit a lazy-save and set up a new ZA state before executing user code. }]; diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index 018b92fdc11f5..d787e4959bfee 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -255,6 +255,19 @@ class AttributeCommonInfo { return SpellingIndex != SpellingNotCalculated; } }; + +inline bool doesKeywordAttributeTakeArgs(tok::TokenKind Kind) { + switch (Kind) { + default: + return false; +#define KEYWORD_ATTRIBUTE(NAME, HASARG) \ + case tok::kw_##NAME: \ + return HASARG; +#include "clang/Basic/RegularKeywordAttrInfo.inc" +#undef KEYWORD_ATTRIBUTE + } +} + } // namespace clang #endif // LLVM_CLANG_BASIC_ATTRIBUTECOMMONINFO_H diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt index 28baa2e45e423..be216150c71bb 100644 --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -45,10 +45,10 @@ clang_tablegen(AttrSubMatchRulesList.inc -gen-clang-attr-subject-match-rule-list SOURCE Attr.td TARGET ClangAttrSubjectMatchRuleList) -clang_tablegen(AttrTokenKinds.inc -gen-clang-attr-token-kinds +clang_tablegen(RegularKeywordAttrInfo.inc -gen-clang-regular-keyword-attr-info -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ SOURCE Attr.td - TARGET ClangAttrTokenKinds + TARGET ClangRegularKeywordAttrInfo ) clang_tablegen(AttrHasAttributeImpl.inc -gen-clang-attr-has-attribute-impl diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 1a79892e40030..c50b188a1039a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3700,6 +3700,12 @@ def err_sme_definition_using_sm_in_non_sme_target : Error< "function executed in streaming-SVE mode requires 'sme'">; def err_sme_definition_using_za_in_non_sme_target : Error< "function using ZA state requires 'sme'">; +def err_conflicting_attributes_arm_state : Error< + "conflicting attributes for state '%0'">; +def err_unknown_arm_state : Error< + "unknown state '%0'">; +def err_missing_arm_state : Error< + "missing state for %0">; def err_cconv_change : Error< "function declared '%0' here was previously declared " "%select{'%2'|without calling convention}1">; diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 3f0e1e1a7d45a..d15e4970b7d8f 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -761,9 +761,9 @@ KEYWORD(__builtin_sycl_unique_stable_name, KEYSYCL) // Keywords defined by Attr.td. #ifndef KEYWORD_ATTRIBUTE -#define KEYWORD_ATTRIBUTE(X) KEYWORD(X, KEYALL) +#define KEYWORD_ATTRIBUTE(X, ...) KEYWORD(X, KEYALL) #endif -#include "clang/Basic/AttrTokenKinds.inc" +#include "clang/Basic/RegularKeywordAttrInfo.inc" // Clang-specific keywords enabled only in testing. TESTING_KEYWORD(__unknown_anytype , KEYALL) diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h index e4857405bc7f4..7529b922619ad 100644 --- a/clang/include/clang/Basic/TokenKinds.h +++ b/clang/include/clang/Basic/TokenKinds.h @@ -109,8 +109,8 @@ bool isPragmaAnnotation(TokenKind K); inline constexpr bool isRegularKeywordAttribute(TokenKind K) { return (false -#define KEYWORD_ATTRIBUTE(X) || (K == tok::kw_##X) -#include "clang/Basic/AttrTokenKinds.inc" +#define KEYWORD_ATTRIBUTE(X, ...) || (K == tok::kw_##X) +#include "clang/Basic/RegularKeywordAttrInfo.inc" ); } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index cf2d4fbe6d3ba..4ef1fe542ea54 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -7110,15 +7110,7 @@ class Sema final { NestedNameSpecInfo &IdInfo, bool EnteringContext); - /// The kind of conversion to check for. Either all attributes must match exactly, - /// or the converted type may add/drop '__arm_preserves_za'. - enum class AArch64SMECallConversionKind { - MatchExactly, - MayAddPreservesZA, - MayDropPreservesZA, - }; - bool IsInvalidSMECallConversion(QualType FromType, QualType ToType, - AArch64SMECallConversionKind C); + bool IsInvalidSMECallConversion(QualType FromType, QualType ToType); /// The parser has parsed a nested-name-specifier /// 'template[opt] template-name < template-args >::'. diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index f694124292736..1baf895ebaec2 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -937,15 +937,20 @@ void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T, OS << ')'; FunctionType::ExtInfo Info = T->getExtInfo(); + unsigned SMEBits = T->getAArch64SMEAttributes(); - if ((T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask)) + if (SMEBits & FunctionType::SME_PStateSMCompatibleMask) OS << " __arm_streaming_compatible"; - if ((T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask)) + if (SMEBits & FunctionType::SME_PStateSMEnabledMask) OS << " __arm_streaming"; - if ((T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask)) - OS << " __arm_shared_za"; - if ((T->getAArch64SMEAttributes() & FunctionType::SME_PStateZAPreservedMask)) - OS << " __arm_preserves_za"; + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Preserves) + OS << " __arm_preserves(\"za\")"; + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_In) + OS << " __arm_in(\"za\")"; + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Out) + OS << " __arm_out(\"za\")"; + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_InOut) + OS << " __arm_inout(\"za\")"; printFunctionAfter(Info, OS); @@ -1788,14 +1793,6 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, OS << "__arm_streaming_compatible"; return; } - if (T->getAttrKind() == attr::ArmSharedZA) { - OS << "__arm_shared_za"; - return; - } - if (T->getAttrKind() == attr::ArmPreservesZA) { - OS << "__arm_preserves_za"; - return; - } OS << " __attribute__(("; switch (T->getAttrKind()) { @@ -1839,8 +1836,10 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, case attr::WebAssemblyFuncref: case attr::ArmStreaming: case attr::ArmStreamingCompatible: - case attr::ArmSharedZA: - case attr::ArmPreservesZA: + case attr::ArmIn: + case attr::ArmOut: + case attr::ArmInOut: + case attr::ArmPreserves: llvm_unreachable("This attribute should have been handled already"); case attr::NSReturnsRetained: diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 13677cf150aed..acf6cbad1c748 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1767,14 +1767,21 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, FPT->isNothrow()) FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); - if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + unsigned SMEBits = FPT->getAArch64SMEAttributes(); + if (SMEBits & FunctionType::SME_PStateSMEnabledMask) FuncAttrs.addAttribute("aarch64_pstate_sm_enabled"); - if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + if (SMEBits & FunctionType::SME_PStateSMCompatibleMask) FuncAttrs.addAttribute("aarch64_pstate_sm_compatible"); - if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + + // ZA + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Out || + FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_InOut) + FuncAttrs.addAttribute("aarch64_pstate_za_shared"); + if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Preserves || + FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_In) { FuncAttrs.addAttribute("aarch64_pstate_za_shared"); - if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateZAPreservedMask) FuncAttrs.addAttribute("aarch64_pstate_za_preserved"); + } } static void AddAttributesFromAssumes(llvm::AttrBuilder &FuncAttrs, @@ -2446,9 +2453,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, if (TargetDecl->hasAttr()) FuncAttrs.addAttribute("aarch64_pstate_sm_body"); - - if (TargetDecl->hasAttr()) - FuncAttrs.addAttribute("aarch64_pstate_za_new"); } // Attach "no-builtins" attributes to: diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 0cfe7a0133b7e..01b042ce5dd13 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2380,8 +2380,10 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if (D->hasAttr()) B.addAttribute("aarch64_pstate_sm_body"); - if (D->hasAttr()) - B.addAttribute("aarch64_pstate_za_new"); + if (auto *Attr = D->getAttr()) { + if (Attr->isNewZA()) + B.addAttribute("aarch64_pstate_za_new"); + } // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index ed684c5d57b1e..8d856cc2cf831 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -6791,7 +6791,13 @@ void Parser::ParseDirectDeclarator(Declarator &D) { } else if (Tok.isRegularKeywordAttribute()) { // For consistency with attribute parsing. Diag(Tok, diag::err_keyword_not_allowed) << Tok.getIdentifierInfo(); + bool TakesArgs = doesKeywordAttributeTakeArgs(Tok.getKind()); ConsumeToken(); + if (TakesArgs) { + BalancedDelimiterTracker T(*this, tok::l_paren); + if (!T.consumeOpen()) + T.skipToEnd(); + } } else if (Tok.is(tok::kw_requires) && D.hasGroupingParens()) { // This declarator is declaring a function, but the requires clause is // in the wrong place: diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index d97081da4200d..5576be9e717a9 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -1890,7 +1890,13 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, if (!SkipUntil(tok::r_paren, StopAtSemi)) break; } else if (Tok.isRegularKeywordAttribute()) { + bool TakesArgs = doesKeywordAttributeTakeArgs(Tok.getKind()); ConsumeToken(); + if (TakesArgs) { + BalancedDelimiterTracker T(*this, tok::l_paren); + if (!T.consumeOpen()) + T.skipToEnd(); + } } else { break; } @@ -4539,8 +4545,18 @@ void Parser::ParseCXX11AttributeSpecifierInternal(ParsedAttributes &Attrs, if (Tok.isRegularKeywordAttribute()) { SourceLocation Loc = Tok.getLocation(); IdentifierInfo *AttrName = Tok.getIdentifierInfo(); - Attrs.addNew(AttrName, Loc, nullptr, Loc, nullptr, 0, Tok.getKind()); + ParsedAttr::Form Form = ParsedAttr::Form(Tok.getKind()); + bool TakesArgs = doesKeywordAttributeTakeArgs(Tok.getKind()); ConsumeToken(); + if (TakesArgs) { + if (!Tok.is(tok::l_paren)) + Diag(Tok.getLocation(), diag::err_expected_lparen_after) << AttrName; + else + ParseAttributeArgsCommon(AttrName, Loc, Attrs, EndLoc, + /*ScopeName*/ nullptr, + /*ScopeLoc*/ Loc, Form); + } else + Attrs.addNew(AttrName, Loc, nullptr, Loc, nullptr, 0, Form); return; } @@ -4706,11 +4722,13 @@ SourceLocation Parser::SkipCXX11Attributes() { T.consumeOpen(); T.skipToEnd(); EndLoc = T.getCloseLocation(); - } else if (Tok.isRegularKeywordAttribute()) { + } else if (Tok.isRegularKeywordAttribute() && + !doesKeywordAttributeTakeArgs(Tok.getKind())) { EndLoc = Tok.getLocation(); ConsumeToken(); } else { - assert(Tok.is(tok::kw_alignas) && "not an attribute specifier"); + assert((Tok.is(tok::kw_alignas) || Tok.isRegularKeywordAttribute()) && + "not an attribute specifier"); ConsumeToken(); BalancedDelimiterTracker T(*this, tok::l_paren); if (!T.consumeOpen()) diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp index 242741c15b5ff..5bfabf55f50c4 100644 --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -894,7 +894,8 @@ bool Parser::TrySkipAttributes() { // Note that explicitly checking for `[[` and `]]` allows to fail as // expected in the case of the Objective-C message send syntax. ConsumeBracket(); - } else if (Tok.isRegularKeywordAttribute()) { + } else if (Tok.isRegularKeywordAttribute() && + !doesKeywordAttributeTakeArgs(Tok.getKind())) { ConsumeToken(); } else { ConsumeToken(); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 74f8f626fb163..ace3e386988f0 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3190,11 +3190,15 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, } static bool hasSMEZAState(const FunctionDecl *FD) { - if (FD->hasAttr()) - return true; - if (const auto *T = FD->getType()->getAs()) - if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + if (auto *Attr = FD->getAttr()) + if (Attr->isNewZA()) + return true; + if (const auto *T = FD->getType()->getAs()) { + FunctionType::ArmStateValue State = + FunctionType::getArmZAState(T->getAArch64SMEAttributes()); + if (State != FunctionType::ARM_None) return true; + } return false; } @@ -7522,14 +7526,19 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto, // If the callee uses AArch64 SME ZA state but the caller doesn't define // any, then this is an error. - if (ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateZASharedMask) { + FunctionType::ArmStateValue ArmZAState = + FunctionType::getArmZAState(ExtInfo.AArch64SMEAttributes); + if (ArmZAState != FunctionType::ARM_None) { bool CallerHasZAState = false; if (const auto *CallerFD = dyn_cast(CurContext)) { - if (CallerFD->hasAttr()) + auto *Attr = CallerFD->getAttr(); + if (Attr && Attr->isNewZA()) CallerHasZAState = true; - else if (const auto *FPT = CallerFD->getType()->getAs()) - CallerHasZAState = FPT->getExtProtoInfo().AArch64SMEAttributes & - FunctionType::SME_PStateZASharedMask; + else if (const auto *FPT = + CallerFD->getType()->getAs()) + CallerHasZAState = FunctionType::getArmZAState( + FPT->getExtProtoInfo().AArch64SMEAttributes) != + FunctionType::ARM_None; } if (!CallerHasZAState) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index e92fd104d78eb..4e7049571eeb7 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -3819,8 +3819,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, Scope *S, // It is not permitted to redeclare an SME function with different SME // attributes. - if (IsInvalidSMECallConversion(Old->getType(), New->getType(), - AArch64SMECallConversionKind::MatchExactly)) { + if (IsInvalidSMECallConversion(Old->getType(), New->getType())) { Diag(New->getLocation(), diag::err_sme_attr_mismatch) << New->getType() << Old->getType(); Diag(OldLocation, diag::note_previous_declaration); @@ -12180,13 +12179,15 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, // Check if the function definition uses any AArch64 SME features without // having the '+sme' feature enabled. if (DeclIsDefn) { + const auto *Attr = NewFD->getAttr(); bool UsesSM = NewFD->hasAttr(); - bool UsesZA = NewFD->hasAttr(); + bool UsesZA = Attr && Attr->isNewZA(); if (const auto *FPT = NewFD->getType()->getAs()) { FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); UsesSM |= EPI.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask; - UsesZA |= EPI.AArch64SMEAttributes & FunctionType::SME_PStateZASharedMask; + UsesZA |= FunctionType::getArmZAState(EPI.AArch64SMEAttributes) != + FunctionType::ARM_None; } if (UsesSM || UsesZA) { diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 1a58cfd8e4179..7e6881049d8d9 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8951,26 +8951,74 @@ static bool MustDelayAttributeArguments(const ParsedAttr &AL) { return false; } +static bool checkArmNewAttrMutualExclusion( + Sema &S, const ParsedAttr &AL, const FunctionProtoType *FPT, + FunctionType::ArmStateValue CurrentState, StringRef StateName) { + auto CheckForIncompatibleAttr = + [&](FunctionType::ArmStateValue IncompatibleState, + StringRef IncompatibleStateName) { + if (CurrentState == IncompatibleState) { + S.Diag(AL.getLoc(), diag::err_attributes_are_not_compatible) + << (std::string("'__arm_new(\"") + StateName.str() + "\")'") + << (std::string("'") + IncompatibleStateName.str() + "(\"" + + StateName.str() + "\")'") + << true; + AL.setInvalid(); + } + }; -static void handleArmNewZaAttr(Sema &S, Decl *D, const ParsedAttr &AL) { - if (auto *FPT = dyn_cast(D->getFunctionType())) { - if (FPT->getAArch64SMEAttributes() & - FunctionType::SME_PStateZASharedMask) { - S.Diag(AL.getLoc(), diag::err_attributes_are_not_compatible) - << AL << "'__arm_shared_za'" << true; + CheckForIncompatibleAttr(FunctionType::ARM_In, "__arm_in"); + CheckForIncompatibleAttr(FunctionType::ARM_Out, "__arm_out"); + CheckForIncompatibleAttr(FunctionType::ARM_InOut, "__arm_inout"); + CheckForIncompatibleAttr(FunctionType::ARM_Preserves, "__arm_preserves"); + return AL.isInvalid(); +} + +static void handleArmNewAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (!AL.getNumArgs()) { + S.Diag(AL.getLoc(), diag::err_missing_arm_state) << AL; + AL.setInvalid(); + return; + } + + std::vector NewState; + if (const auto *ExistingAttr = D->getAttr()) { + for (StringRef S : ExistingAttr->newArgs()) + NewState.push_back(S); + } + + bool HasZA = false; + for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) { + StringRef StateName; + SourceLocation LiteralLoc; + if (!S.checkStringLiteralArgumentAttr(AL, I, StateName, &LiteralLoc)) + return; + + if (StateName == "za") + HasZA = true; + else { + S.Diag(LiteralLoc, diag::err_unknown_arm_state) << StateName; AL.setInvalid(); + return; } - if (FPT->getAArch64SMEAttributes() & - FunctionType::SME_PStateZAPreservedMask) { - S.Diag(AL.getLoc(), diag::err_attributes_are_not_compatible) - << AL << "'__arm_preserves_za'" << true; - AL.setInvalid(); + + if (std::find(NewState.begin(), NewState.end(), StateName) == + NewState.end()) { // Avoid adding duplicates. + NewState.push_back(StateName); } - if (AL.isInvalid()) + } + + if (auto *FPT = dyn_cast(D->getFunctionType())) { + FunctionType::ArmStateValue ZAState = + FunctionType::getArmZAState(FPT->getAArch64SMEAttributes()); + if (HasZA && ZAState != FunctionType::ARM_None && + checkArmNewAttrMutualExclusion(S, AL, FPT, ZAState, "za")) return; } - handleSimpleAttribute(S, D, AL); + D->dropAttr(); + D->addAttr(::new (S.Context) + ArmNewAttr(S.Context, AL, NewState.data(), NewState.size())); } /// ProcessDeclAttribute - Apply the specific attribute to the specified decl if @@ -9752,8 +9800,8 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, handleSimpleAttribute(S, D, AL); break; - case ParsedAttr::AT_ArmNewZA: - handleArmNewZaAttr(S, D, AL); + case ParsedAttr::AT_ArmNew: + handleArmNewAttr(S, D, AL); break; case ParsedAttr::AT_AcquireHandle: diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 36e53c684ac4d..f229e734d06b2 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -18328,9 +18328,7 @@ bool Sema::CheckOverridingFunctionAttributes(const CXXMethodDecl *New, } // SME attributes must match when overriding a function declaration. - if (IsInvalidSMECallConversion( - Old->getType(), New->getType(), - AArch64SMECallConversionKind::MayAddPreservesZA)) { + if (IsInvalidSMECallConversion(Old->getType(), New->getType())) { Diag(New->getLocation(), diag::err_conflicting_overriding_attributes) << New << New->getType() << Old->getType(); Diag(Old->getLocation(), diag::note_overridden_virtual_function); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 2f48ea237cdfa..049fdae09bb18 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9803,8 +9803,7 @@ ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc, } // Check that the SME attributes for PSTATE.ZA and PSTATE.SM are compatible. -bool Sema::IsInvalidSMECallConversion(QualType FromType, QualType ToType, - AArch64SMECallConversionKind C) { +bool Sema::IsInvalidSMECallConversion(QualType FromType, QualType ToType) { unsigned FromAttributes = 0, ToAttributes = 0; if (const auto *FromFn = dyn_cast(Context.getCanonicalType(FromType))) @@ -9815,25 +9814,7 @@ bool Sema::IsInvalidSMECallConversion(QualType FromType, QualType ToType, ToAttributes = ToFn->getAArch64SMEAttributes() & FunctionType::SME_AttributeMask; - if (FromAttributes == ToAttributes) - return false; - - // If the '__arm_preserves_za' is the only difference between the types, - // check whether we're allowed to add or remove it. - if ((FromAttributes ^ ToAttributes) == - FunctionType::SME_PStateZAPreservedMask) { - switch (C) { - case AArch64SMECallConversionKind::MatchExactly: - return true; - case AArch64SMECallConversionKind::MayAddPreservesZA: - return !(ToAttributes & FunctionType::SME_PStateZAPreservedMask); - case AArch64SMECallConversionKind::MayDropPreservesZA: - return !(FromAttributes & FunctionType::SME_PStateZAPreservedMask); - } - } - - // There has been a mismatch of attributes - return true; + return FromAttributes != ToAttributes; } // Check if we have a conversion between incompatible cmse function pointer @@ -10002,9 +9983,7 @@ checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType, return Sema::IncompatibleFunctionPointer; if (IsInvalidCmseNSCallConversion(S, ltrans, rtrans)) return Sema::IncompatibleFunctionPointer; - if (S.IsInvalidSMECallConversion( - rtrans, ltrans, - Sema::AArch64SMECallConversionKind::MayDropPreservesZA)) + if (S.IsInvalidSMECallConversion(rtrans, ltrans)) return Sema::IncompatibleFunctionPointer; return ConvTy; } diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 23b9bc0fe2d6e..31111222e0437 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1784,26 +1784,6 @@ bool Sema::IsFunctionConversion(QualType FromType, QualType ToType, Changed = true; } - // Drop the 'arm_preserves_za' if not present in the target type (we can do - // that because it is merely a hint). - if (const auto *FromFPT = dyn_cast(FromFn)) { - FunctionProtoType::ExtProtoInfo ExtInfo = FromFPT->getExtProtoInfo(); - if (ExtInfo.AArch64SMEAttributes & - FunctionType::SME_PStateZAPreservedMask) { - unsigned ToFlags = 0; - if (const auto *ToFPT = dyn_cast(ToFn)) - ToFlags = ToFPT->getExtProtoInfo().AArch64SMEAttributes; - if (!(ToFlags & FunctionType::SME_PStateZAPreservedMask)) { - ExtInfo.setArmSMEAttribute(FunctionType::SME_PStateZAPreservedMask, - false); - QualType QT = Context.getFunctionType( - FromFPT->getReturnType(), FromFPT->getParamTypes(), ExtInfo); - FromFn = QT->getAs(); - Changed = true; - } - } - } - // Drop 'noexcept' if not present in target type. if (const auto *FromFPT = dyn_cast(FromFn)) { const auto *ToFPT = cast(ToFn); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index a376f20fa4f4e..78702b41ab820 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -147,8 +147,10 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr, case ParsedAttr::AT_CmseNSCall: \ case ParsedAttr::AT_ArmStreaming: \ case ParsedAttr::AT_ArmStreamingCompatible: \ - case ParsedAttr::AT_ArmSharedZA: \ - case ParsedAttr::AT_ArmPreservesZA: \ + case ParsedAttr::AT_ArmPreserves: \ + case ParsedAttr::AT_ArmIn: \ + case ParsedAttr::AT_ArmOut: \ + case ParsedAttr::AT_ArmInOut: \ case ParsedAttr::AT_AnyX86NoCallerSavedRegisters: \ case ParsedAttr::AT_AnyX86NoCfCheck: \ CALLING_CONV_ATTRS_CASELIST @@ -7876,6 +7878,49 @@ static bool checkMutualExclusion(TypeProcessingState &state, return true; } +static bool handleArmStateAttribute(Sema &S, + FunctionProtoType::ExtProtoInfo &EPI, + ParsedAttr &Attr, + FunctionType::ArmStateValue State) { + if (!Attr.getNumArgs()) { + S.Diag(Attr.getLoc(), diag::err_missing_arm_state) << Attr; + Attr.setInvalid(); + return true; + } + + for (unsigned I = 0; I < Attr.getNumArgs(); ++I) { + StringRef StateName; + SourceLocation LiteralLoc; + if (!S.checkStringLiteralArgumentAttr(Attr, I, StateName, &LiteralLoc)) + return true; + + unsigned Shift; + FunctionType::ArmStateValue ExistingState; + if (StateName == "za") { + Shift = FunctionType::SME_ZAShift; + ExistingState = FunctionType::getArmZAState(EPI.AArch64SMEAttributes); + } else { + S.Diag(LiteralLoc, diag::err_unknown_arm_state) << StateName; + Attr.setInvalid(); + return true; + } + + // __arm_in(S), __arm_out(S), __arm_inout(S) and __arm_preserves(S) + // are all mutually exclusive for the same S, so check if there are + // conflicting attributes. + if (ExistingState != FunctionType::ARM_None && ExistingState != State) { + S.Diag(LiteralLoc, diag::err_conflicting_attributes_arm_state) + << StateName; + Attr.setInvalid(); + return true; + } + + EPI.setArmSMEAttribute( + (FunctionType::AArch64SMETypeAttributes)((State << Shift))); + } + return false; +} + /// Process an individual function attribute. Returns true to /// indicate that the attribute was handled, false if it wasn't. static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr, @@ -8008,11 +8053,18 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr, if (attr.getKind() == ParsedAttr::AT_ArmStreaming || attr.getKind() == ParsedAttr::AT_ArmStreamingCompatible || - attr.getKind() == ParsedAttr::AT_ArmSharedZA || - attr.getKind() == ParsedAttr::AT_ArmPreservesZA){ - if (S.CheckAttrTarget(attr) || S.CheckAttrNoArgs(attr)) + attr.getKind() == ParsedAttr::AT_ArmPreserves || + attr.getKind() == ParsedAttr::AT_ArmIn || + attr.getKind() == ParsedAttr::AT_ArmOut || + attr.getKind() == ParsedAttr::AT_ArmInOut) { + if (S.CheckAttrTarget(attr)) return true; + if (attr.getKind() == ParsedAttr::AT_ArmStreaming || + attr.getKind() == ParsedAttr::AT_ArmStreamingCompatible) + if (S.CheckAttrNoArgs(attr)) + return true; + if (!unwrapped.isFunctionType()) return false; @@ -8039,11 +8091,21 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr, return true; EPI.setArmSMEAttribute(FunctionType::SME_PStateSMCompatibleMask); break; - case ParsedAttr::AT_ArmSharedZA: - EPI.setArmSMEAttribute(FunctionType::SME_PStateZASharedMask); + case ParsedAttr::AT_ArmPreserves: + if (handleArmStateAttribute(S, EPI, attr, FunctionType::ARM_Preserves)) + return true; + break; + case ParsedAttr::AT_ArmIn: + if (handleArmStateAttribute(S, EPI, attr, FunctionType::ARM_In)) + return true; break; - case ParsedAttr::AT_ArmPreservesZA: - EPI.setArmSMEAttribute(FunctionType::SME_PStateZAPreservedMask); + case ParsedAttr::AT_ArmOut: + if (handleArmStateAttribute(S, EPI, attr, FunctionType::ARM_Out)) + return true; + break; + case ParsedAttr::AT_ArmInOut: + if (handleArmStateAttribute(S, EPI, attr, FunctionType::ARM_InOut)) + return true; break; default: llvm_unreachable("Unsupported attribute"); diff --git a/clang/test/AST/ast-dump-sme-attributes.cpp b/clang/test/AST/ast-dump-sme-attributes.cpp index 6581fd4baba9e..133648d90a157 100644 --- a/clang/test/AST/ast-dump-sme-attributes.cpp +++ b/clang/test/AST/ast-dump-sme-attributes.cpp @@ -13,16 +13,16 @@ struct Foo { // CHECK-NEXT: |-CXXMethodDecl {{.*}} f_streaming_compatible 'void () __arm_streaming_compatible' // CHECK-NEXT: |-CXXMethodDecl {{.*}} f_locally_streaming 'void ()' // CHECK-NEXT: | `-ArmLocallyStreamingAttr -// CHECK-NEXT: |-CXXMethodDecl {{.*}} f_shared_za 'void () __arm_shared_za' +// CHECK-NEXT: |-CXXMethodDecl {{.*}} f_shared_za 'void () __arm_inout("za")' // CHECK-NEXT: |-CXXMethodDecl {{.*}} f_new_za 'void ()' -// CHECK-NEXT: | `-ArmNewZAAttr -// CHECK-NEXT: |-CXXMethodDecl {{.*}} f_preserves_za 'void () __arm_preserves_za' +// CHECK-NEXT: | `-ArmNewAttr {{.*}} za +// CHECK-NEXT: |-CXXMethodDecl {{.*}} f_preserves_za 'void () __arm_preserves("za")' void f_streaming() __arm_streaming; void f_streaming_compatible() __arm_streaming_compatible; __arm_locally_streaming void f_locally_streaming(); - void f_shared_za() __arm_shared_za; - __arm_new_za void f_new_za(); - void f_preserves_za() __arm_preserves_za; + void f_shared_za() __arm_inout("za"); + __arm_new("za") void f_new_za(); + void f_preserves_za() __arm_preserves("za"); // CHECK: |-CXXMethodDecl {{.*}} test_lambda 'int (int)' implicit-inline diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp index 0768bfc332387..f69703a8a7d89 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp @@ -12,8 +12,8 @@ extern int normal_callee(); int streaming_decl(void) __arm_streaming; int streaming_compatible_decl(void) __arm_streaming_compatible; -int shared_za_decl(void) __arm_shared_za; -int preserves_za_decl(void) __arm_preserves_za; +int shared_za_decl(void) __arm_inout("za"); +int preserves_za_decl(void) __arm_preserves("za"); int private_za_decl(void); // == FUNCTION DEFINITIONS == @@ -78,7 +78,7 @@ __arm_locally_streaming int locally_streaming_callee() { // CHECK-SAME: #[[ZA_SHARED:[0-9]+]] // CHECK: call i32 @normal_callee() // - int shared_za_caller() __arm_shared_za { + int shared_za_caller() __arm_inout("za") { return normal_callee(); } @@ -86,7 +86,7 @@ __arm_locally_streaming int locally_streaming_callee() { // CHECK-SAME: #[[ZA_SHARED]] // CHECK: call i32 @shared_za_decl() #[[ZA_SHARED_CALL:[0-9]+]] // - int shared_za_callee() __arm_shared_za { + int shared_za_callee() __arm_inout("za") { return shared_za_decl(); } @@ -97,7 +97,7 @@ __arm_locally_streaming int locally_streaming_callee() { // CHECK-SAME: #[[ZA_PRESERVED:[0-9]+]] // CHECK: call i32 @normal_callee() // - int preserves_za_caller() __arm_preserves_za { + int preserves_za_caller() __arm_preserves("za") { return normal_callee(); } @@ -105,7 +105,7 @@ __arm_locally_streaming int locally_streaming_callee() { // CHECK-SAME: #[[ZA_PRESERVED]] // CHECK: call i32 @preserves_za_decl() #[[ZA_PRESERVED_CALL:[0-9]+]] // - int preserves_za_callee() __arm_preserves_za { + int preserves_za_callee() __arm_preserves("za") { return preserves_za_decl(); } @@ -116,7 +116,7 @@ __arm_locally_streaming int locally_streaming_callee() { // CHECK-SAME: #[[ZA_NEW:[0-9]+]] // CHECK: call i32 @normal_callee() // -__arm_new_za int new_za_caller() { +__arm_new("za") int new_za_caller() { return normal_callee(); } @@ -124,7 +124,7 @@ __arm_new_za int new_za_caller() { // CHECK-SAME: #[[ZA_NEW]] // CHECK: call i32 @private_za_decl() // -__arm_new_za int new_za_callee() { +__arm_new("za") int new_za_callee() { return private_za_decl(); } @@ -135,8 +135,8 @@ __arm_new_za int new_za_callee() { // and also to callsites. typedef void (*s_ptrty) (int, int) __arm_streaming; typedef void (*sc_ptrty) (int, int) __arm_streaming_compatible; -typedef void (*sz_ptrty) (int, int) __arm_shared_za; -typedef void (*pz_ptrty) (int, int) __arm_preserves_za; +typedef void (*sz_ptrty) (int, int) __arm_inout("za"); +typedef void (*pz_ptrty) (int, int) __arm_preserves("za"); // CHECK-LABEL: @test_streaming_ptrty( // CHECK-SAME: #[[NORMAL_DEF:[0-9]+]] @@ -152,12 +152,12 @@ void test_streaming_compatible_ptrty(sc_ptrty f, int x, int y) { return f(x, y); // CHECK-SAME: #[[ZA_SHARED]] // CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ZA_SHARED_CALL]] // -void test_shared_za(sz_ptrty f, int x, int y) __arm_shared_za { return f(x, y); } +void test_shared_za(sz_ptrty f, int x, int y) __arm_inout("za") { return f(x, y); } // CHECK-LABEL: @test_preserved_za( // CHECK-SAME: #[[ZA_SHARED]] // CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ZA_PRESERVED_CALL]] // -void test_preserved_za(pz_ptrty f, int x, int y) __arm_shared_za { return f(x, y); } +void test_preserved_za(pz_ptrty f, int x, int y) __arm_inout("za") { return f(x, y); } // CHECK-LABEL: @test_indirect_streaming_ptrty( // CHECK-SAME: #[[NORMAL_DEF:[0-9]+]] @@ -255,7 +255,7 @@ int call() { return 0; } template __attribute__((always_inline)) -int call(T f, Other... other) __arm_shared_za { +int call(T f, Other... other) __arm_inout("za") { return f() + call(other...); } @@ -270,7 +270,7 @@ int call(T f, Other... other) __arm_shared_za { // CHECK-NEXT: add nsw // CHECK-NEXT: add nsw // CHECK-NEXT: ret -int test_variadic_template() __arm_shared_za { +int test_variadic_template() __arm_inout("za") { return call(normal_callee, streaming_decl, streaming_compatible_decl, @@ -286,18 +286,18 @@ int test_variadic_template() __arm_shared_za { // CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind "aarch64_pstate_sm_body" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[ZA_SHARED]] = { mustprogress noinline nounwind "aarch64_pstate_za_shared" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[ZA_SHARED_DECL]] = { "aarch64_pstate_za_shared" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind "aarch64_pstate_za_preserved" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } -// CHECK: attributes #[[ZA_PRESERVED_DECL]] = { "aarch64_pstate_za_preserved" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind "aarch64_pstate_za_preserved" "aarch64_pstate_za_shared" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } +// CHECK: attributes #[[ZA_PRESERVED_DECL]] = { "aarch64_pstate_za_preserved" "aarch64_pstate_za_shared" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[ZA_NEW]] = { mustprogress noinline nounwind "aarch64_pstate_za_new" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[NORMAL_DEF]] = { mustprogress noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme" } // CHECK: attributes #[[SM_ENABLED_CALL]] = { "aarch64_pstate_sm_enabled" } // CHECK: attributes #[[SM_COMPATIBLE_CALL]] = { "aarch64_pstate_sm_compatible" } // CHECK: attributes #[[SM_BODY_CALL]] = { "aarch64_pstate_sm_body" } // CHECK: attributes #[[ZA_SHARED_CALL]] = { "aarch64_pstate_za_shared" } -// CHECK: attributes #[[ZA_PRESERVED_CALL]] = { "aarch64_pstate_za_preserved" } +// CHECK: attributes #[[ZA_PRESERVED_CALL]] = { "aarch64_pstate_za_preserved" "aarch64_pstate_za_shared" } // CHECK: attributes #[[NOUNWIND_CALL]] = { nounwind } // CHECK: attributes #[[NOUNWIND_SM_ENABLED_CALL]] = { nounwind "aarch64_pstate_sm_enabled" } // CHECK: attributes #[[NOUNWIND_SM_COMPATIBLE_CALL]] = { nounwind "aarch64_pstate_sm_compatible" } // CHECK: attributes #[[NOUNWIND_ZA_SHARED_CALL]] = { nounwind "aarch64_pstate_za_shared" } -// CHECK: attributes #[[NOUNWIND_ZA_PRESERVED_CALL]] = { nounwind "aarch64_pstate_za_preserved" } +// CHECK: attributes #[[NOUNWIND_ZA_PRESERVED_CALL]] = { nounwind "aarch64_pstate_za_preserved" "aarch64_pstate_za_shared" } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c index 55d2e355897f7..2ee14f6a7e882 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c @@ -30,7 +30,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za32, _u32, _m)(0, pn, pm, zn); } @@ -50,7 +50,7 @@ void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za32, _u32, _m)(3, pn, pm, zn); } @@ -70,7 +70,7 @@ void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za32, _s32, _m)(0, pn, pm, zn); } @@ -90,7 +90,7 @@ void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za32, _s32, _m)(3, pn, pm, zn); } @@ -110,7 +110,7 @@ void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_strea // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za32, _u32, _m)(0, pn, pm, zn); } @@ -130,7 +130,7 @@ void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za32, _u32, _m)(3, pn, pm, zn); } @@ -150,7 +150,7 @@ void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za32, _s32, _m)(0, pn, pm, zn); } @@ -170,7 +170,7 @@ void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za32, _s32, _m)(3, pn, pm, zn); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c index 8e9c2e7da46a3..a0fb9bdb4e25a 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c @@ -30,7 +30,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za64, _u64, _m)(0, pn, pm, zn); } @@ -50,7 +50,7 @@ void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za64, _u64, _m)(7, pn, pm, zn); } @@ -70,7 +70,7 @@ void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za64, _s64, _m)(0, pn, pm, zn); } @@ -90,7 +90,7 @@ void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddha_za64, _s64, _m)(7, pn, pm, zn); } @@ -110,7 +110,7 @@ void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_strea // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za64, _u64, _m)(0, pn, pm, zn); } @@ -130,7 +130,7 @@ void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stream // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za64, _u64, _m)(7, pn, pm, zn); } @@ -150,7 +150,7 @@ void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za64, _s64, _m)(0, pn, pm, zn); } @@ -170,7 +170,7 @@ void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streami // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, [[TMP0]], [[TMP1]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svaddva_za64, _s64, _m)(7, pn, pm, zn); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c index e17782db222b6..183a3986212bc 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c @@ -22,7 +22,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { +void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { svld1_hor_za8(0, slice_base, pg, ptr); svld1_hor_za8(0, slice_base + 15, pg, ptr); } @@ -45,7 +45,7 @@ void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { +void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { svld1_hor_za16(0, slice_base, pg, ptr); svld1_hor_za16(1, slice_base + 7, pg, ptr); } @@ -68,7 +68,7 @@ void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { +void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { svld1_hor_za32(0, slice_base, pg, ptr); svld1_hor_za32(3, slice_base + 3, pg, ptr); } @@ -91,7 +91,7 @@ void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { +void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { svld1_hor_za64(0, slice_base, pg, ptr); svld1_hor_za64(7, slice_base + 1, pg, ptr); } @@ -112,7 +112,7 @@ void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { +void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { svld1_hor_za128(0, slice_base, pg, ptr); svld1_hor_za128(15, slice_base, pg, ptr); } @@ -133,7 +133,7 @@ void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { +void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { svld1_ver_za8(0, slice_base, pg, ptr); svld1_ver_za8(0, slice_base + 15, pg, ptr); } @@ -156,7 +156,7 @@ void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { +void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { svld1_ver_za16(0, slice_base, pg, ptr); svld1_ver_za16(1, slice_base + 7, pg, ptr); } @@ -179,7 +179,7 @@ void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { +void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { svld1_ver_za32(0, slice_base, pg, ptr); svld1_ver_za32(3, slice_base + 3, pg, ptr); } @@ -202,7 +202,7 @@ void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { +void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { svld1_ver_za64(0, slice_base, pg, ptr); svld1_ver_za64(7, slice_base + 1, pg, ptr); } @@ -223,7 +223,7 @@ void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_shared_za { +void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { svld1_ver_za128(0, slice_base, pg, ptr); svld1_ver_za128(15, slice_base, pg, ptr); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c index 0fa77e1144a7d..68a294b1a237a 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c @@ -28,7 +28,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { svld1_hor_vnum_za8(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } @@ -57,7 +57,7 @@ void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { svld1_hor_vnum_za16(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } @@ -86,7 +86,7 @@ void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { svld1_hor_vnum_za32(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } @@ -115,7 +115,7 @@ void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { svld1_hor_vnum_za64(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } @@ -142,7 +142,7 @@ void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { svld1_hor_vnum_za128(0, slice_base, pg, ptr, vnum); svld1_hor_vnum_za128(15, slice_base, pg, ptr, vnum); } @@ -169,7 +169,7 @@ void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { svld1_ver_vnum_za8(0, slice_base, pg, ptr, vnum); svld1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } @@ -198,7 +198,7 @@ void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, i // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { svld1_ver_vnum_za16(0, slice_base, pg, ptr, vnum); svld1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } @@ -227,7 +227,7 @@ void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { svld1_ver_vnum_za32(0, slice_base, pg, ptr, vnum); svld1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } @@ -256,7 +256,7 @@ void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { svld1_ver_vnum_za64(0, slice_base, pg, ptr, vnum); svld1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } @@ -283,7 +283,7 @@ void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") { svld1_ver_vnum_za128(0, slice_base, pg, ptr, vnum); svld1_ver_vnum_za128(15, slice_base, pg, ptr, vnum); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index 314c9645dd4f7..56eb7f45f6d9a 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -12,7 +12,7 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) // CHECK-NEXT: ret void // -void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) __arm_shared_za { +void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) __arm_out("za") { svldr_vnum_za(slice_base, ptr, 0); } @@ -22,7 +22,7 @@ void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) __arm_shared_za { // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 15) // CHECK-NEXT: ret void // -void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) __arm_shared_za { +void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) __arm_out("za") { svldr_vnum_za(slice_base, ptr, 15); } @@ -32,7 +32,7 @@ void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) __arm_shared_za // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) // CHECK-NEXT: ret void // -void test_svldr_za(uint32_t slice_base, const void *ptr) __arm_shared_za { +void test_svldr_za(uint32_t slice_base, const void *ptr) __arm_out("za") { svldr_za(slice_base, ptr); } @@ -43,7 +43,7 @@ void test_svldr_za(uint32_t slice_base, const void *ptr) __arm_shared_za { // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 [[TMP0:%.*]]) // CHECK-NEXT: ret void // -void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) __arm_shared_za { +void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) __arm_out("za") { svldr_vnum_za(slice_base, ptr, vnum); } @@ -53,6 +53,6 @@ void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) // CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 16) // CHECK-NEXT: ret void // -void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) __arm_shared_za { +void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) __arm_out("za") { svldr_vnum_za(slice_base, ptr, 16); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c index e84f31c2dfa92..5015f9bc9f41e 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c @@ -26,7 +26,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -42,7 +42,7 @@ void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za32, _u8, _m)(0, pn, pm, zn, zm); } @@ -62,7 +62,7 @@ void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za32, _bf16, _m)(0, pn, pm, zn, zm); } @@ -82,7 +82,7 @@ void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za32, _f16, _m)(1, pn, pm, zn, zm); } @@ -102,7 +102,7 @@ void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 1, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { +void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za32, _f32, _m)(1, pn, pm, zn, zm); } @@ -118,7 +118,7 @@ void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svsumopa_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -134,7 +134,7 @@ void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svusmopa_za32, _u8, _m)(0, pn, pm, zn, zm); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c index 1b22eb64e9e36..80457359e2a4e 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c @@ -30,7 +30,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za64, _s16, _m)(7, pn, pm, zn, zm); } @@ -50,7 +50,7 @@ void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za64, _u16, _m)(0, pn, pm, zn, zm); } @@ -70,7 +70,7 @@ void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { +void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmopa_za64, _f64, _m)(7, pn, pm, zn, zm); } @@ -90,7 +90,7 @@ void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svsumopa_za64, _s16, _m)(0, pn, pm, zn, zm); } @@ -110,7 +110,7 @@ void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svusmopa_za64, _u16, _m)(7, pn, pm, zn, zm); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c index 0ff97ff92f714..7dad7289d98ad 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c @@ -26,7 +26,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -42,7 +42,7 @@ void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za32, _u8, _m)(0, pn, pm, zn, zm); } @@ -62,7 +62,7 @@ void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za32, _bf16, _m)(0, pn, pm, zn, zm); } @@ -82,7 +82,7 @@ void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za32, _f16, _m)(1, pn, pm, zn, zm); } @@ -102,7 +102,7 @@ void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i32 1, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { +void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za32, _f32, _m)(1, pn, pm, zn, zm); } @@ -118,7 +118,7 @@ void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svsumops_za32, _s8, _m)(0, pn, pm, zn, zm); } @@ -134,7 +134,7 @@ void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 0, [[PN]], [[PM]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svusmops_za32, _u8, _m)(0, pn, pm, zn, zm); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c index 5b190a7f9b748..e95a5ea2e6941 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c @@ -30,7 +30,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za64, _s16, _m)(7, pn, pm, zn, zm); } @@ -50,7 +50,7 @@ void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za64, _u16, _m)(0, pn, pm, zn, zm); } @@ -70,7 +70,7 @@ void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { +void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svmops_za64, _f64, _m)(7, pn, pm, zn, zm); } @@ -90,7 +90,7 @@ void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 0, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svsumops_za64, _s16, _m)(0, pn, pm, zn, zm); } @@ -110,7 +110,7 @@ void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, [[TMP0]], [[TMP1]], [[ZN]], [[ZM]]) // CHECK-CXX-NEXT: ret void // -void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svusmops_za64, _u16, _m)(7, pn, pm, zn, zm); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c index 843a96da90278..700564c1532e0 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c @@ -26,7 +26,7 @@ // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base); } @@ -44,7 +44,7 @@ svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) _ // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 15; return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice); } @@ -63,7 +63,7 @@ svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base); } @@ -83,7 +83,7 @@ svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice); } @@ -102,7 +102,7 @@ svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base); } @@ -122,7 +122,7 @@ svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice); } @@ -141,7 +141,7 @@ svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base); } @@ -161,7 +161,7 @@ svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice); } @@ -178,7 +178,7 @@ svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base); } @@ -196,7 +196,7 @@ svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 15; return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice); } @@ -215,7 +215,7 @@ svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base); } @@ -235,7 +235,7 @@ svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice); } @@ -254,7 +254,7 @@ svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base); } @@ -274,7 +274,7 @@ svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice); } @@ -293,7 +293,7 @@ svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base); } @@ -313,7 +313,7 @@ svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice); } @@ -332,7 +332,7 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base); } @@ -352,7 +352,7 @@ svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice); } @@ -371,7 +371,7 @@ svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base); } @@ -391,7 +391,7 @@ svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice); } @@ -410,7 +410,7 @@ svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base); } @@ -430,7 +430,7 @@ svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice); } @@ -449,7 +449,7 @@ svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base); } @@ -469,7 +469,7 @@ svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice); } @@ -486,7 +486,7 @@ svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base); } @@ -502,7 +502,7 @@ svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD]], [[PG]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base); } @@ -520,7 +520,7 @@ svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base); } @@ -538,7 +538,7 @@ svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base); } @@ -556,7 +556,7 @@ svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base); } @@ -574,7 +574,7 @@ svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base); } @@ -592,7 +592,7 @@ svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base); } @@ -610,7 +610,7 @@ svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base); } @@ -626,7 +626,7 @@ svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base); } @@ -642,7 +642,7 @@ svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD]], [[PG]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base); } @@ -660,7 +660,7 @@ svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base); } @@ -678,7 +678,7 @@ svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base); } @@ -696,7 +696,7 @@ svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base); } @@ -714,7 +714,7 @@ svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base); } @@ -732,7 +732,7 @@ svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base); } @@ -750,7 +750,7 @@ svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base); } @@ -768,7 +768,7 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base); } @@ -786,7 +786,7 @@ svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base); } @@ -804,7 +804,7 @@ svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base); } @@ -822,7 +822,7 @@ svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base); } @@ -840,7 +840,7 @@ svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base); } @@ -858,7 +858,7 @@ svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base); } @@ -876,7 +876,7 @@ svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base); } @@ -894,7 +894,7 @@ svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base); } @@ -910,7 +910,7 @@ svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base); } @@ -928,7 +928,7 @@ svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) _ // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 15; return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice); } @@ -947,7 +947,7 @@ svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base); } @@ -967,7 +967,7 @@ svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice); } @@ -986,7 +986,7 @@ svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base); } @@ -1006,7 +1006,7 @@ svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice); } @@ -1025,7 +1025,7 @@ svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base); } @@ -1045,7 +1045,7 @@ svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice); } @@ -1062,7 +1062,7 @@ svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base); } @@ -1080,7 +1080,7 @@ svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 15; return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice); } @@ -1099,7 +1099,7 @@ svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base); } @@ -1119,7 +1119,7 @@ svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice); } @@ -1138,7 +1138,7 @@ svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base); } @@ -1158,7 +1158,7 @@ svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice); } @@ -1177,7 +1177,7 @@ svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base); } @@ -1197,7 +1197,7 @@ svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice); } @@ -1216,7 +1216,7 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base); } @@ -1236,7 +1236,7 @@ svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice); } @@ -1255,7 +1255,7 @@ svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base); } @@ -1275,7 +1275,7 @@ svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD]], [[TMP0]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 7; return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice); } @@ -1294,7 +1294,7 @@ svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base); } @@ -1314,7 +1314,7 @@ svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD]], [[TMP0]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 3; return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice); } @@ -1333,7 +1333,7 @@ svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base); } @@ -1353,7 +1353,7 @@ svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD]], [[TMP0]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { uint32_t slice = slice_base + 1; return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice); } @@ -1370,7 +1370,7 @@ svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base); } @@ -1386,7 +1386,7 @@ svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD]], [[PG]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base); } @@ -1404,7 +1404,7 @@ svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base); } @@ -1422,7 +1422,7 @@ svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base); } @@ -1440,7 +1440,7 @@ svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base); } @@ -1458,7 +1458,7 @@ svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base); } @@ -1476,7 +1476,7 @@ svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base); } @@ -1494,7 +1494,7 @@ svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base); } @@ -1510,7 +1510,7 @@ svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD]], [[PG]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base); } @@ -1526,7 +1526,7 @@ svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD]], [[PG]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP0]] // -svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base); } @@ -1544,7 +1544,7 @@ svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base); } @@ -1562,7 +1562,7 @@ svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base); } @@ -1580,7 +1580,7 @@ svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base); } @@ -1598,7 +1598,7 @@ svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base); } @@ -1616,7 +1616,7 @@ svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base); } @@ -1634,7 +1634,7 @@ svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_ // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base); } @@ -1652,7 +1652,7 @@ svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base); } @@ -1670,7 +1670,7 @@ svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base); } @@ -1688,7 +1688,7 @@ svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base); } @@ -1706,7 +1706,7 @@ svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base); } @@ -1724,7 +1724,7 @@ svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base); } @@ -1742,7 +1742,7 @@ svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base); } @@ -1760,7 +1760,7 @@ svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD]], [[TMP0]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base); } @@ -1778,7 +1778,7 @@ svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD]], [[TMP0]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret [[TMP1]] // -svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_shared_za { +svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c index 98ebbefc2e74c..97e20ab262902 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c @@ -22,7 +22,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { svst1_hor_za8(0, slice_base, pg, ptr); svst1_hor_za8(0, slice_base + 15, pg, ptr); } @@ -45,7 +45,7 @@ void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_strea // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { svst1_hor_za16(0, slice_base, pg, ptr); svst1_hor_za16(1, slice_base + 7, pg, ptr); } @@ -68,7 +68,7 @@ void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { svst1_hor_za32(0, slice_base, pg, ptr); svst1_hor_za32(3, slice_base + 3, pg, ptr); } @@ -91,7 +91,7 @@ void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { svst1_hor_za64(0, slice_base, pg, ptr); svst1_hor_za64(7, slice_base + 1, pg, ptr); } @@ -112,7 +112,7 @@ void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { svst1_hor_za128(0, slice_base, pg, ptr); svst1_hor_za128(15, slice_base, pg, ptr); } @@ -133,7 +133,7 @@ void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_str // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { svst1_ver_za8(0, slice_base, pg, ptr); svst1_ver_za8(0, slice_base + 15, pg, ptr); } @@ -156,7 +156,7 @@ void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_strea // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { svst1_ver_za16(0, slice_base, pg, ptr); svst1_ver_za16(1, slice_base + 7, pg, ptr); } @@ -179,7 +179,7 @@ void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { svst1_ver_za32(0, slice_base, pg, ptr); svst1_ver_za32(3, slice_base + 3, pg, ptr); } @@ -202,7 +202,7 @@ void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { svst1_ver_za64(0, slice_base, pg, ptr); svst1_ver_za64(7, slice_base + 1, pg, ptr); } @@ -223,7 +223,7 @@ void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_stre // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming __arm_in("za") { svst1_ver_za128(0, slice_base, pg, ptr); svst1_ver_za128(15, slice_base, pg, ptr); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c index 938e62a15c771..7566ad8889e05 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c @@ -28,7 +28,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { svst1_hor_vnum_za8(0, slice_base, pg, ptr, vnum); svst1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } @@ -57,7 +57,7 @@ void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { svst1_hor_vnum_za16(0, slice_base, pg, ptr, vnum); svst1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } @@ -86,7 +86,7 @@ void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { svst1_hor_vnum_za32(0, slice_base, pg, ptr, vnum); svst1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } @@ -115,7 +115,7 @@ void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { svst1_hor_vnum_za64(0, slice_base, pg, ptr, vnum); svst1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } @@ -142,7 +142,7 @@ void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { svst1_hor_vnum_za128(0, slice_base, pg, ptr, vnum); svst1_hor_vnum_za128(15, slice_base, pg, ptr, vnum); } @@ -169,7 +169,7 @@ void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int6 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { svst1_ver_vnum_za8(0, slice_base, pg, ptr, vnum); svst1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum); } @@ -198,7 +198,7 @@ void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { svst1_ver_vnum_za16(0, slice_base, pg, ptr, vnum); svst1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum); } @@ -227,7 +227,7 @@ void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { svst1_ver_vnum_za32(0, slice_base, pg, ptr, vnum); svst1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum); } @@ -256,7 +256,7 @@ void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { svst1_ver_vnum_za64(0, slice_base, pg, ptr, vnum); svst1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum); } @@ -283,7 +283,7 @@ void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // -void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_shared_za { +void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming __arm_in("za") { svst1_ver_vnum_za128(0, slice_base, pg, ptr, vnum); svst1_ver_vnum_za128(15, slice_base, pg, ptr, vnum); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c index 282819c8ca350..c3e4967bfe9b1 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c @@ -66,7 +66,7 @@ bool test_has_sme(void) __arm_streaming_compatible { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: ret void // -void test_svundef_za(void) __arm_streaming_compatible __arm_shared_za { +void test_svundef_za(void) __arm_streaming_compatible __arm_out("za") { svundef_za(); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c index bcf368bc8dce4..d21c1ce7a8cd9 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c @@ -12,7 +12,7 @@ // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) // CHECK-NEXT: ret void // -void test_svstr_vnum_za(uint32_t slice_base, void *ptr) __arm_shared_za { +void test_svstr_vnum_za(uint32_t slice_base, void *ptr) __arm_in("za") { svstr_vnum_za(slice_base, ptr, 0); } @@ -22,7 +22,7 @@ void test_svstr_vnum_za(uint32_t slice_base, void *ptr) __arm_shared_za { // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 15) // CHECK-NEXT: ret void // -void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) __arm_shared_za { +void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) __arm_in("za") { svstr_vnum_za(slice_base, ptr, 15); } @@ -32,7 +32,7 @@ void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) __arm_shared_za { // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 0) // CHECK-NEXT: ret void // -void test_svstr_za(uint32_t slice_base, void *ptr) __arm_shared_za { +void test_svstr_za(uint32_t slice_base, void *ptr) __arm_in("za") { svstr_za(slice_base, ptr); } @@ -43,7 +43,7 @@ void test_svstr_za(uint32_t slice_base, void *ptr) __arm_shared_za { // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 [[TMP0:%.*]]) // CHECK-NEXT: ret void // -void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) __arm_shared_za { +void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) __arm_in("za") { svstr_vnum_za(slice_base, ptr, vnum); } @@ -53,6 +53,6 @@ void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) __arm_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]], i32 16) // CHECK-NEXT: ret void // -void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) __arm_shared_za { +void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) __arm_in("za") { svstr_vnum_za(slice_base, ptr, 16); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c index 38c8402c3d0fa..5ddd90b95ce89 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c @@ -26,7 +26,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, pg, zn); } @@ -44,7 +44,7 @@ void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 15; SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice, pg, zn); } @@ -63,7 +63,7 @@ void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, pg, zn); } @@ -83,7 +83,7 @@ void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice, pg, zn); } @@ -102,7 +102,7 @@ void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, pg, zn); } @@ -122,7 +122,7 @@ void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice, pg, zn); } @@ -141,7 +141,7 @@ void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, pg, zn); } @@ -161,7 +161,7 @@ void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice, pg, zn); } @@ -178,7 +178,7 @@ void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, pg, zn); } @@ -196,7 +196,7 @@ void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 15; SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice, pg, zn); } @@ -215,7 +215,7 @@ void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, pg, zn); } @@ -235,7 +235,7 @@ void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice, pg, zn); } @@ -254,7 +254,7 @@ void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, pg, zn); } @@ -274,7 +274,7 @@ void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice, pg, zn); } @@ -293,7 +293,7 @@ void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, pg, zn); } @@ -313,7 +313,7 @@ void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice, pg, zn); } @@ -332,7 +332,7 @@ void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, pg, zn); } @@ -352,7 +352,7 @@ void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice, pg, zn); } @@ -371,7 +371,7 @@ void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, pg, zn); } @@ -391,7 +391,7 @@ void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice, pg, zn); } @@ -410,7 +410,7 @@ void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, pg, zn); } @@ -430,7 +430,7 @@ void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice, pg, zn); } @@ -449,7 +449,7 @@ void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, pg, zn); } @@ -469,7 +469,7 @@ void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice, pg, zn); } @@ -486,7 +486,7 @@ void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, pg, zn); } @@ -502,7 +502,7 @@ void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, pg, zn); } @@ -520,7 +520,7 @@ void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, pg, zn); } @@ -538,7 +538,7 @@ void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, pg, zn); } @@ -556,7 +556,7 @@ void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, pg, zn); } @@ -574,7 +574,7 @@ void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, pg, zn); } @@ -592,7 +592,7 @@ void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, pg, zn); } @@ -610,7 +610,7 @@ void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, pg, zn); } @@ -626,7 +626,7 @@ void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, pg, zn); } @@ -642,7 +642,7 @@ void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, pg, zn); } @@ -660,7 +660,7 @@ void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, pg, zn); } @@ -678,7 +678,7 @@ void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, pg, zn); } @@ -696,7 +696,7 @@ void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, pg, zn); } @@ -714,7 +714,7 @@ void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, pg, zn); } @@ -732,7 +732,7 @@ void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, pg, zn); } @@ -750,7 +750,7 @@ void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, pg, zn); } @@ -768,7 +768,7 @@ void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, pg, zn); } @@ -786,7 +786,7 @@ void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, pg, zn); } @@ -804,7 +804,7 @@ void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, pg, zn); } @@ -822,7 +822,7 @@ void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, pg, zn); } @@ -840,7 +840,7 @@ void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, pg, zn); } @@ -858,7 +858,7 @@ void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, pg, zn); } @@ -876,7 +876,7 @@ void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, pg, zn); } @@ -894,7 +894,7 @@ void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, pg, zn); } @@ -910,7 +910,7 @@ void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, pg, zn); } @@ -928,7 +928,7 @@ void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ar // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 15; SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice, pg, zn); } @@ -947,7 +947,7 @@ void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, pg, zn); } @@ -967,7 +967,7 @@ void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice, pg, zn); } @@ -986,7 +986,7 @@ void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, pg, zn); } @@ -1006,7 +1006,7 @@ void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice, pg, zn); } @@ -1025,7 +1025,7 @@ void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, pg, zn); } @@ -1045,7 +1045,7 @@ void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice, pg, zn); } @@ -1062,7 +1062,7 @@ void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, pg, zn); } @@ -1080,7 +1080,7 @@ void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __a // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 15; SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice, pg, zn); } @@ -1099,7 +1099,7 @@ void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, pg, zn); } @@ -1119,7 +1119,7 @@ void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice, pg, zn); } @@ -1138,7 +1138,7 @@ void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, pg, zn); } @@ -1158,7 +1158,7 @@ void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice, pg, zn); } @@ -1177,7 +1177,7 @@ void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, pg, zn); } @@ -1197,7 +1197,7 @@ void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice, pg, zn); } @@ -1216,7 +1216,7 @@ void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, pg, zn); } @@ -1236,7 +1236,7 @@ void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice, pg, zn); } @@ -1255,7 +1255,7 @@ void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, pg, zn); } @@ -1275,7 +1275,7 @@ void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 7; SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice, pg, zn); } @@ -1294,7 +1294,7 @@ void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, pg, zn); } @@ -1314,7 +1314,7 @@ void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 3; SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice, pg, zn); } @@ -1333,7 +1333,7 @@ void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, pg, zn); } @@ -1353,7 +1353,7 @@ void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[ADD]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { uint32_t slice = slice_base + 1; SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice, pg, zn); } @@ -1370,7 +1370,7 @@ void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, pg, zn); } @@ -1386,7 +1386,7 @@ void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, pg, zn); } @@ -1404,7 +1404,7 @@ void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, pg, zn); } @@ -1422,7 +1422,7 @@ void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, pg, zn); } @@ -1440,7 +1440,7 @@ void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, pg, zn); } @@ -1458,7 +1458,7 @@ void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, pg, zn); } @@ -1476,7 +1476,7 @@ void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, pg, zn); } @@ -1494,7 +1494,7 @@ void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, pg, zn); } @@ -1510,7 +1510,7 @@ void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, pg, zn); } @@ -1526,7 +1526,7 @@ void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) _ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], [[PG]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, pg, zn); } @@ -1544,7 +1544,7 @@ void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, pg, zn); } @@ -1562,7 +1562,7 @@ void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, pg, zn); } @@ -1580,7 +1580,7 @@ void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, pg, zn); } @@ -1598,7 +1598,7 @@ void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, pg, zn); } @@ -1616,7 +1616,7 @@ void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, pg, zn); } @@ -1634,7 +1634,7 @@ void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, pg, zn); } @@ -1652,7 +1652,7 @@ void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, pg, zn); } @@ -1670,7 +1670,7 @@ void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, pg, zn); } @@ -1688,7 +1688,7 @@ void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, pg, zn); } @@ -1706,7 +1706,7 @@ void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, pg, zn); } @@ -1724,7 +1724,7 @@ void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, pg, zn); } @@ -1742,7 +1742,7 @@ void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, pg, zn); } @@ -1760,7 +1760,7 @@ void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, pg, zn); } @@ -1778,7 +1778,7 @@ void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE]], [[TMP0]], [[ZN]]) // CHECK-CXX-NEXT: ret void // -void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming __arm_inout("za") { SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, pg, zn); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c index ddd9602369538..658092bc33ff9 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c @@ -18,7 +18,7 @@ // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 0) // CHECK-CXX-NEXT: ret void // -void test_svzero_mask_za(void) __arm_shared_za { +void test_svzero_mask_za(void) __arm_inout("za") { svzero_mask_za(0); } @@ -34,7 +34,7 @@ void test_svzero_mask_za(void) __arm_shared_za { // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 176) // CHECK-CXX-NEXT: ret void // -void test_svzero_mask_za_1(void) __arm_shared_za { +void test_svzero_mask_za_1(void) __arm_inout("za") { svzero_mask_za(176); } @@ -50,7 +50,7 @@ void test_svzero_mask_za_1(void) __arm_shared_za { // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) // CHECK-CXX-NEXT: ret void // -void test_svzero_mask_za_2(void) __arm_shared_za { +void test_svzero_mask_za_2(void) __arm_inout("za") { svzero_mask_za(255); } @@ -66,7 +66,7 @@ void test_svzero_mask_za_2(void) __arm_shared_za { // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) // CHECK-CXX-NEXT: ret void // -void test_svzero_za(void) __arm_shared_za { +void test_svzero_za(void) __arm_out("za") { svzero_za(); } //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c index 46b4a0386502e..311f965786137 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c @@ -36,7 +36,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x2)(slice_base, zn, zm); } @@ -54,7 +54,7 @@ void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x2)(slice_base, zn, zm); } @@ -72,7 +72,7 @@ void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,_single,_za64,_s64,_vg1x2)(slice_base, zn, zm); } @@ -90,7 +90,7 @@ void test_svadd_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,_single,_za64,_u64,_vg1x2)(slice_base, zn, zm); } @@ -114,7 +114,7 @@ void test_svadd_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x4)(slice_base, zn, zm); } @@ -136,7 +136,7 @@ void test_svadd_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x4)(slice_base, zn, zm); } @@ -158,7 +158,7 @@ void test_svadd_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,_single,_za64,_s64,_vg1x4)(slice_base, zn, zm); } @@ -180,7 +180,7 @@ void test_svadd_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,_single,_za64,_u64,_vg1x4)(slice_base, zn, zm); } @@ -208,7 +208,7 @@ void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,,_za32,_s32,_vg1x2)(slice_base, zn, zm); } @@ -230,7 +230,7 @@ void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,,_za32,_u32,_vg1x2)(slice_base, zn, zm); } @@ -252,7 +252,7 @@ void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,,_za64,_s64,_vg1x2)(slice_base, zn, zm); } @@ -274,7 +274,7 @@ void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x2)(slice_base, zn, zm); } @@ -306,7 +306,7 @@ void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,,_za32,_s32,_vg1x4)(slice_base, zn, zm); } @@ -336,7 +336,7 @@ void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,,_za32,_u32,_vg1x4)(slice_base, zn, zm); } @@ -366,7 +366,7 @@ void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,,_za64,_s64,_vg1x4)(slice_base, zn, zm); } @@ -396,7 +396,7 @@ void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) __arm_streaming __arm_shared_za { +void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x4)(slice_base, zn, zm); } @@ -420,7 +420,7 @@ void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za32,,_f32,,_vg1x2)(slice_base, zn); } @@ -438,7 +438,7 @@ void test_svadd_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_stre // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za32,,_s32,,_vg1x2)(slice_base , zn); } @@ -456,7 +456,7 @@ void test_svadd_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za32,,_u32,,_vg1x2)(slice_base, zn); } @@ -474,7 +474,7 @@ void test_svadd_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za64,,_f64,,_vg1x2)(slice_base, zn); } @@ -492,7 +492,7 @@ void test_svadd_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_stre // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za64,,_s64,,_vg1x2)(slice_base, zn); } @@ -510,7 +510,7 @@ void test_svadd_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za64,,_u64,,_vg1x2)(slice_base, zn); } @@ -534,7 +534,7 @@ void test_svadd_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za32,,_f32,,_vg1x4)(slice_base, zn); } @@ -556,7 +556,7 @@ void test_svadd_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_stre // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za32,,_s32,,_vg1x4)(slice_base, zn); } @@ -578,7 +578,7 @@ void test_svadd_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za32,,_u32,,_vg1x4)(slice_base, zn); } @@ -600,7 +600,7 @@ void test_svadd_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za64,,_f64,,_vg1x4)(slice_base, zn); } @@ -622,7 +622,7 @@ void test_svadd_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_stre // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za64,,_s64,,_vg1x4)(slice_base, zn); } @@ -644,6 +644,6 @@ void test_svadd_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) __arm_streaming __arm_shared_za { +void test_svadd_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svadd_za64,,_u64,,_vg1x4)(slice_base, zn); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_bmop.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_bmop.c index 67a330625884f..e6415c66fbb48 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_bmop.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_bmop.c @@ -33,7 +33,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svbmopa_u32(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __arm_streaming __arm_shared_za { +void test_svbmopa_u32(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svbmopa_za32,_u32,_m,)(3, pn, pm, zn, zm); } @@ -51,7 +51,7 @@ void test_svbmopa_u32(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.bmopa.za32.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svbmopa_s32(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __arm_streaming __arm_shared_za { +void test_svbmopa_s32(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svbmopa_za32,_s32,_m,)(3, pn, pm, zn, zm); } @@ -71,7 +71,7 @@ void test_svbmopa_s32(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svbmops_u32(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __arm_streaming __arm_shared_za { +void test_svbmops_u32(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svbmops_za32,_u32,_m,)(3, pn, pm, zn, zm); } @@ -89,6 +89,6 @@ void test_svbmops_u32(svbool_t pn, svbool_t pm, svuint32_t zn, svuint32_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.bmops.za32.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svbmops_s32(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __arm_streaming __arm_shared_za { +void test_svbmops_s32(svbool_t pn, svbool_t pm, svint32_t zn, svint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svbmops_za32,_s32,_m,)(3, pn, pm, zn, zm); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fp_dots.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fp_dots.c index ff4176530710a..c4651f4d83e1f 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fp_dots.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fp_dots.c @@ -35,7 +35,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_f16,_vg1x2)(slice_base, zn, zm); } @@ -65,7 +65,7 @@ void test_svdot_multi_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfl // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_f16,_vg1x4)(slice_base, zn, zm); } @@ -86,7 +86,7 @@ void test_svdot_multi_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfl // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_f16,,_vg1x2)(slice_base, zn, zm); } @@ -108,7 +108,7 @@ void test_svdot_single_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svf // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_f16,,_vg1x4)(slice_base, zn, zm); } @@ -129,7 +129,7 @@ void test_svdot_single_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svf // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,,_f16,_vg1x2)(slice_base, zn, zm, 3); } @@ -151,7 +151,7 @@ void test_svdot_lane_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svflo // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,,_f16,_vg1x4)(slice_base, zn, zm, 3); } @@ -176,7 +176,7 @@ void test_svdot_lane_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svflo // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_bf16,_vg1x2)(slice_base, zn, zm); } @@ -206,7 +206,7 @@ void test_svdot_multi_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, sv // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_bf16,_vg1x4)(slice_base, zn, zm); } @@ -227,7 +227,7 @@ void test_svdot_multi_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, sv // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_bf16,,_vg1x2)(slice_base, zn, zm); } @@ -249,7 +249,7 @@ void test_svdot_single_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, s // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_bf16,,_vg1x4)(slice_base, zn, zm); } @@ -270,7 +270,7 @@ void test_svdot_single_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, s // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,_bf16,,_vg1x2)(slice_base, zn, zm, 3); } @@ -292,6 +292,6 @@ void test_svdot_lane_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svb // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,_bf16,,_vg1x4)(slice_base, zn, zm, 3); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_int_dots.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_int_dots.c index 0d85071b7fc3e..6e0e9433bae2f 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_int_dots.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_int_dots.c @@ -35,7 +35,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_u16,_vg1x2)(slice_base, zn, zm); } @@ -65,7 +65,7 @@ void test_svdot_multi_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_u16,_vg1x4)(slice_base, zn, zm); } @@ -87,7 +87,7 @@ void test_svdot_multi_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_u8,_vg1x2)(slice_base, zn, zm); } @@ -117,7 +117,7 @@ void test_svdot_multi_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_u8,_vg1x4)(slice_base, zn, zm); } @@ -139,7 +139,7 @@ void test_svdot_multi_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za64,,,_u16,_vg1x2)(slice_base, zn, zm); } @@ -169,7 +169,7 @@ void test_svdot_multi_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za64,,,_u16,_vg1x4)(slice_base, zn, zm); } @@ -194,7 +194,7 @@ void test_svdot_multi_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_s16,_vg1x2)(slice_base, zn, zm); } @@ -224,7 +224,7 @@ void test_svdot_multi_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint1 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_s16,_vg1x4)(slice_base, zn, zm); } @@ -246,7 +246,7 @@ void test_svdot_multi_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint1 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_s8,_vg1x2)(slice_base, zn, zm); } @@ -276,7 +276,7 @@ void test_svdot_multi_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za32,,,_s8,_vg1x4)(slice_base, zn, zm); } @@ -298,7 +298,7 @@ void test_svdot_multi_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za64,,,_s16,_vg1x2)(slice_base, zn, zm); } @@ -328,7 +328,7 @@ void test_svdot_multi_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint1 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_multi_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_shared_za { +void test_svdot_multi_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_za64,,,_s16,_vg1x4)(slice_base, zn, zm); } @@ -349,7 +349,7 @@ void test_svdot_multi_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint1 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_u16,,_vg1x2)(slice_base, zn, zm); } @@ -371,7 +371,7 @@ void test_svdot_single_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svui // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_u16,,_vg1x4)(slice_base, zn, zm); } @@ -389,7 +389,7 @@ void test_svdot_single_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svui // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_u8,,_vg1x2)(slice_base, zn, zm); } @@ -411,7 +411,7 @@ void test_svdot_single_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_u8,,_vg1x4)(slice_base, zn, zm); } @@ -429,7 +429,7 @@ void test_svdot_single_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za64,,_u16,,_vg1x2)(slice_base, zn, zm); } @@ -451,7 +451,7 @@ void test_svdot_single_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svui // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za64,,_u16,,_vg1x4)(slice_base, zn, zm); } @@ -472,7 +472,7 @@ void test_svdot_single_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svui // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_s16,,_vg1x2)(slice_base, zn, zm); } @@ -494,7 +494,7 @@ void test_svdot_single_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_s16,,_vg1x4)(slice_base, zn, zm); } @@ -512,7 +512,7 @@ void test_svdot_single_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_s8,,_vg1x2)(slice_base, zn, zm); } @@ -534,7 +534,7 @@ void test_svdot_single_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za32,,_s8,,_vg1x4)(slice_base, zn, zm); } @@ -552,7 +552,7 @@ void test_svdot_single_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za64,,_s16,,_vg1x2)(slice_base, zn, zm); } @@ -574,7 +574,7 @@ void test_svdot_single_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svdot_single_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_single_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_single_za64,,_s16,,_vg1x4)(slice_base, zn, zm); } @@ -594,7 +594,7 @@ void test_svdot_single_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,_u16,,_vg1x2)(slice_base, zn, zm, 3); } @@ -616,7 +616,7 @@ void test_svdot_lane_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,_u16,,_vg1x4)(slice_base, zn, zm, 3); } @@ -634,7 +634,7 @@ void test_svdot_lane_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,_u8,,_vg1x2)(slice_base, zn, zm, 3); } @@ -656,7 +656,7 @@ void test_svdot_lane_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,_u8,,_vg1x4)(slice_base, zn, zm, 3); } @@ -674,7 +674,7 @@ void test_svdot_lane_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za64,,,_u16,_vg1x2)(slice_base, zn, zm, 1); } @@ -696,7 +696,7 @@ void test_svdot_lane_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za64,,,_u16,_vg1x4)(slice_base, zn, zm, 1); } @@ -717,7 +717,7 @@ void test_svdot_lane_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,,_s16,_vg1x2)(slice_base, zn, zm, 3); } @@ -739,7 +739,7 @@ void test_svdot_lane_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,,_s16,_vg1x4)(slice_base, zn, zm, 3); } @@ -757,7 +757,7 @@ void test_svdot_lane_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,,_s8,_vg1x2)(slice_base, zn, zm, 3); } @@ -779,7 +779,7 @@ void test_svdot_lane_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za32,,,_s8,_vg1x4)(slice_base, zn, zm, 3); } @@ -797,7 +797,7 @@ void test_svdot_lane_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za64,,,_s16,_vg1x2)(slice_base, zn, zm, 1); } @@ -819,7 +819,7 @@ void test_svdot_lane_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // -void test_svdot_lane_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svdot_lane_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svdot_lane_za64,,,_s16,_vg1x4)(slice_base, zn, zm, 1); } @@ -844,7 +844,7 @@ void test_svdot_lane_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svusdot_multi_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_shared_za { +void test_svusdot_multi_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusdot_za32,,,_u8,_vg1x2)(slice_base, zn, zm); } @@ -874,7 +874,7 @@ void test_svusdot_multi_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svusdot_multi_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_shared_za { +void test_svusdot_multi_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusdot_za32,,,_u8,_vg1x4)(slice_base, zn, zm); } @@ -895,7 +895,7 @@ void test_svusdot_multi_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svusdot_single_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svusdot_single_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusdot_single_za32,,_u8,,_vg1x2)(slice_base, zn, zm); } @@ -917,7 +917,7 @@ void test_svusdot_single_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svusdot_single_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svusdot_single_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusdot_single_za32,,_u8,,_vg1x4)(slice_base, zn, zm); } @@ -937,7 +937,7 @@ void test_svusdot_single_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svusdot_lane_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svusdot_lane_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusdot_lane_za32,,_u8,,_vg1x2)(slice_base, zn, zm, 3); } @@ -959,7 +959,7 @@ void test_svusdot_lane_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svusdot_lane_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svusdot_lane_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusdot_lane_za32,,_u8,,_vg1x4)(slice_base, zn, zm, 3); } @@ -980,7 +980,7 @@ void test_svusdot_lane_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsudot_single_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svsudot_single_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsudot_single_za32,,_s8,,_vg1x2)(slice_base, zn, zm); } @@ -1002,7 +1002,7 @@ void test_svsudot_single_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsudot_single_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svsudot_single_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsudot_single_za32,,_s8,,_vg1x4)(slice_base, zn, zm); } @@ -1026,7 +1026,7 @@ void test_svsudot_single_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP2]], [[TMP3]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsudot_multi_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_shared_za { +void test_svsudot_multi_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsudot_za32,,_s8,,_vg1x2)(slice_base, zn, zm); } @@ -1056,7 +1056,7 @@ void test_svsudot_multi_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsudot_multi_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_shared_za { +void test_svsudot_multi_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsudot_za32,,_s8,,_vg1x4)(slice_base, zn, zm); } @@ -1076,7 +1076,7 @@ void test_svsudot_multi_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svsudot_lane_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svsudot_lane_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsudot_lane_za32,,_s8,,_vg1x2)(slice_base, zn, zm, 3); } @@ -1098,6 +1098,6 @@ void test_svsudot_lane_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svsudot_lane_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svsudot_lane_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsudot_lane_za32,,_s8,,_vg1x4)(slice_base, zn, zm, 3); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c index 83fbd6e5855ca..2e3cbd33d1e2c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -20,7 +20,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za { +void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_out("za") { svldr_zt(0, base); } @@ -36,6 +36,6 @@ void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { +void test_svstr_zt(void *base) __arm_streaming_compatible __arm_in("za") { svstr_zt(0, base); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c index cb34db3695b24..a31c6d982100b 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c @@ -19,7 +19,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_u8(0, zn, 15); } @@ -34,7 +34,7 @@ svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_s8(0, zn, 15); } @@ -48,7 +48,7 @@ svint8_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za _ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_u16(0, zn, 15); } @@ -63,7 +63,7 @@ svuint16_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_z // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_s16(0, zn, 15); } @@ -77,7 +77,7 @@ svint16_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_za // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8f16(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_f16(0, zn, 15); } @@ -91,7 +91,7 @@ svfloat16_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8bf16(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_bf16(0, zn, 15); } @@ -105,7 +105,7 @@ svbfloat16_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_share // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_u32(0, zn, 15); } @@ -119,7 +119,7 @@ svuint32_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared_z // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_s32(0, zn, 15); } @@ -133,6 +133,6 @@ svint32_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_za // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv4f32(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_f32(0, zn, 15); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c index 04f37af46767a..db44f52a37bf0 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -26,7 +26,7 @@ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_u8_x2(0, zn, 7); } @@ -49,7 +49,7 @@ svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_z // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_s8_x2(0, zn, 7); } @@ -71,7 +71,7 @@ svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_u16_x2(0, zn, 7); } @@ -94,7 +94,7 @@ svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_s16_x2(0, zn, 7); } @@ -116,7 +116,7 @@ svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_f16_x2(0, zn, 7); } @@ -138,7 +138,7 @@ svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_share // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_bf16_x2(0, zn, 7); } @@ -160,7 +160,7 @@ svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_sha // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_u32_x2(0, zn, 7); } @@ -182,7 +182,7 @@ svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_s32_x2(0, zn, 7); } @@ -204,6 +204,6 @@ svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_f32_x2(0, zn, 7); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c index 8c38d829a7f4c..23b2c6cc51283 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -34,7 +34,7 @@ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_u8_x4(0, zn, 3); } @@ -65,7 +65,7 @@ svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_z // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_s8_x4(0, zn, 3); } @@ -95,7 +95,7 @@ svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_u16_x4(0, zn, 3); } @@ -125,7 +125,7 @@ svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_s16_x4(0, zn, 3); } @@ -155,7 +155,7 @@ svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_f16_x4(0, zn, 3); } @@ -185,7 +185,7 @@ svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_share // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_bf16_x4(0, zn, 3); } @@ -215,7 +215,7 @@ svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_sha // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_u32_x4(0, zn, 3); } @@ -245,7 +245,7 @@ svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_s32_x4(0, zn, 3); } @@ -275,6 +275,6 @@ svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti2_lane_zt_f32_x4(0, zn, 3); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c index 9815b0e825b30..c949cee1124ec 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c @@ -19,7 +19,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_u8(0, zn, 7); } @@ -34,7 +34,7 @@ svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_s8(0, zn, 7); } @@ -48,7 +48,7 @@ svint8_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za _ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_u16(0, zn, 7); } @@ -62,7 +62,7 @@ svuint16_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_z // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_s16(0, zn, 7); } @@ -76,7 +76,7 @@ svint16_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_za // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv8f16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_f16(0, zn, 7); } @@ -90,7 +90,7 @@ svfloat16_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_bf16(0, zn, 7); } @@ -104,7 +104,7 @@ svbfloat16_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_share // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_u32(0, zn, 7); } @@ -118,7 +118,7 @@ svuint32_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared_z // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_s32(0, zn, 7); } @@ -132,6 +132,6 @@ svint32_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_za // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv4f32(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_f32(0, zn, 7); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c index 4c181dd9123c5..61affd86d9119 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c @@ -26,7 +26,7 @@ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_u8_x2(0, zn, 3); } @@ -49,7 +49,7 @@ svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_shared_z // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_s8_x2(0, zn, 3); } @@ -71,7 +71,7 @@ svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_shared_za // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_u16_x2(0, zn, 3); } @@ -94,7 +94,7 @@ svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_s16_x2(0, zn, 3); } @@ -116,7 +116,7 @@ svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_f16_x2(0, zn, 3); } @@ -138,7 +138,7 @@ svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_share // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_bf16_x2(0, zn, 3); } @@ -160,7 +160,7 @@ svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_sha // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_u32_x2(0, zn, 3); } @@ -182,7 +182,7 @@ svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_s32_x2(0, zn, 3); } @@ -204,6 +204,6 @@ svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_f32_x2(0, zn, 3); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c index 9baccef888d58..7b478aa2b6ad4 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c @@ -36,7 +36,7 @@ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_u16_x4(0, zn, 1); } @@ -68,7 +68,7 @@ svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_shared // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_f16_x4(0, zn, 1); } @@ -100,7 +100,7 @@ svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_share // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_bf16_x4(0, zn, 1); } @@ -132,7 +132,7 @@ svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_sha // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_s16_x4(0, zn, 1); } @@ -164,7 +164,7 @@ svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_u32_x4(0, zn, 1); } @@ -196,7 +196,7 @@ svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_shared // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_s32_x4(0, zn, 1); } @@ -228,6 +228,6 @@ svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { return svluti4_lane_zt_f32_x4(0, zn, 1); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c index 2679f9cc8dfd0..18c4b77ac3292 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c @@ -35,7 +35,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmla2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming __arm_shared_za { +void test_svmla2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_f32,_vg1x2,,)(slice_base, zn, zm); } @@ -65,7 +65,7 @@ void test_svmla2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmla4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __arm_streaming __arm_shared_za { +void test_svmla4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_f32,_vg1x4,,)(slice_base, zn, zm); } @@ -85,7 +85,7 @@ void test_svmla4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { +void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2)(slice_base, zn, zm); } @@ -107,7 +107,7 @@ void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { +void test_svmla_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x4)(slice_base, zn, zm); } @@ -127,7 +127,7 @@ void test_svmla_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { +void test_svmla_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_f32,_vg1x2,,)(slice_base, zn, zm, 3); } @@ -149,7 +149,7 @@ void test_svmla_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { +void test_svmla_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_f32,_vg1x4,,)(slice_base, zn, zm, 3); } @@ -173,7 +173,7 @@ void test_svmla_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmla2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming __arm_shared_za { +void test_svmla2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za64,_f64,_vg1x2,,)(slice_base, zn, zm); } @@ -203,7 +203,7 @@ void test_svmla2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmla4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __arm_streaming __arm_shared_za { +void test_svmla4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za64,_f64,_vg1x4,,)(slice_base, zn, zm); } @@ -223,7 +223,7 @@ void test_svmla4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { +void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za64,_f64,_vg1x2)(slice_base, zn, zm); } @@ -245,7 +245,7 @@ void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { +void test_svmla_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm); } @@ -265,7 +265,7 @@ void test_svmla_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { +void test_svmla_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za64,_f64,_vg1x2,,)(slice_base, zn, zm, 1); } @@ -287,6 +287,6 @@ void test_svmla_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { +void test_svmla_lane4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za64,_f64,_vg1x4,,)(slice_base, zn, zm, 1); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c index 032830512987a..4b785cb86df64 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c @@ -35,7 +35,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmla2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_shared_za +void test_svmla2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_f16,_vg2x2,,)(slice_base, zn, zm); } @@ -58,7 +58,7 @@ void test_svmla2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmla2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_shared_za +void test_svmla2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_bf16,_vg2x2,,)(slice_base, zn, zm); } @@ -81,7 +81,7 @@ void test_svmla2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmla2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_shared_za +void test_svmla2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_u16,_vg2x2,,)(slice_base, zn, zm); } @@ -104,7 +104,7 @@ void test_svmla2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmla2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_shared_za +void test_svmla2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_s16,_vg2x2,,)(slice_base, zn, zm); } @@ -135,7 +135,7 @@ void test_svmla2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmla4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_shared_za +void test_svmla4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_f16,_vg2x4,,)(slice_base, zn, zm); } @@ -166,7 +166,7 @@ void test_svmla4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmla4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_shared_za +void test_svmla4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_bf16,_vg2x4,,)(slice_base, zn, zm); } @@ -197,7 +197,7 @@ void test_svmla4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmla4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_shared_za +void test_svmla4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_u16,_vg2x4,,)(slice_base, zn, zm); } @@ -228,7 +228,7 @@ void test_svmla4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmla4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_shared_za +void test_svmla4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_s16,_vg2x4,,)(slice_base, zn, zm); } @@ -245,7 +245,7 @@ void test_svmla4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_f16,_vg2x1,,)(slice_base, zn, zm); } @@ -260,7 +260,7 @@ void test_svmla_single1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_bf16,_vg2x1,,)(slice_base, zn, zm); } @@ -275,7 +275,7 @@ void test_svmla_single1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_u16,_vg2x1,,)(slice_base, zn, zm); } @@ -290,7 +290,7 @@ void test_svmla_single1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_s16,_vg2x1,,)(slice_base, zn, zm); } @@ -309,7 +309,7 @@ void test_svmla_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_f16,_vg2x2)(slice_base, zn, zm); } @@ -328,7 +328,7 @@ void test_svmla_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_bf16,_vg2x2)(slice_base, zn, zm); } @@ -347,7 +347,7 @@ void test_svmla_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_u16,_vg2x2)(slice_base, zn, zm); } @@ -366,7 +366,7 @@ void test_svmla_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_s16,_vg2x2)(slice_base, zn, zm); } @@ -389,7 +389,7 @@ void test_svmla_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_f16,_vg2x4)(slice_base, zn, zm); } @@ -412,7 +412,7 @@ void test_svmla_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_bf16,_vg2x4)(slice_base, zn, zm); } @@ -435,7 +435,7 @@ void test_svmla_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_u16,_vg2x4)(slice_base, zn, zm); } @@ -458,7 +458,7 @@ void test_svmla_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_s16,_vg2x4)(slice_base, zn, zm); } @@ -477,7 +477,7 @@ void test_svmla_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_f16,_vg2x1,,)(slice_base, zn, zm, 7); } @@ -492,7 +492,7 @@ void test_svmla_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_bf16,_vg2x1,,)(slice_base, zn, zm, 7); } @@ -507,7 +507,7 @@ void test_svmla_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_u16,_vg2x1,,)(slice_base, zn, zm, 7); } @@ -522,7 +522,7 @@ void test_svmla_lane1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_s16,_vg2x1,,)(slice_base, zn, zm, 7); } @@ -541,7 +541,7 @@ void test_svmla_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_f16,_vg2x2,,)(slice_base, zn, zm, 7); } @@ -560,7 +560,7 @@ void test_svmla_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_bf16,_vg2x2,,)(slice_base, zn, zm, 7); } @@ -579,7 +579,7 @@ void test_svmla_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_u16,_vg2x2,,)(slice_base, zn, zm, 7); } @@ -598,7 +598,7 @@ void test_svmla_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_s16,_vg2x2,,)(slice_base, zn, zm, 7); } @@ -621,7 +621,7 @@ void test_svmla_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_f16,_vg2x4,,)(slice_base, zn, zm, 7); } @@ -644,7 +644,7 @@ void test_svmla_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_bf16,_vg2x4,,)(slice_base, zn, zm, 7); } @@ -667,7 +667,7 @@ void test_svmla_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_u16,_vg2x4,,)(slice_base, zn, zm, 7); } @@ -690,7 +690,7 @@ void test_svmla_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmla_lane4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_lane4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_s16,_vg2x4,,)(slice_base, zn, zm, 7); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c index 44e715e9581b7..50cbe3e4bac5b 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c @@ -31,7 +31,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_svmla_single_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_s8,_vg4x1,,)(slice_base, zn, zm); } @@ -46,7 +46,7 @@ void test_svmla_single_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za64,_s16,_vg4x1,,)(slice_base, zn, zm); } @@ -61,7 +61,7 @@ void test_svmla_single_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_uvmlal_single_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_uvmlal_single_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_u8,_vg4x1,,)(slice_base, zn, zm); } @@ -76,7 +76,7 @@ void test_uvmlal_single_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_uvmlal_single_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_uvmlal_single_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za64,_u16,_vg4x1,,)(slice_base, zn, zm); } @@ -93,7 +93,7 @@ void test_uvmlal_single_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_svmls_single_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_s8,_vg4x1,,)(slice_base, zn, zm); } @@ -108,7 +108,7 @@ void test_svmls_single_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za64,_s16,_vg4x1,,)(slice_base, zn, zm); } @@ -123,7 +123,7 @@ void test_svmls_single_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_uvmlsl_single_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_uvmlsl_single_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_u8,_vg4x1,,)(slice_base, zn, zm); } @@ -138,7 +138,7 @@ void test_uvmlsl_single_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_uvmlsl_single_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_uvmlsl_single_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za64,_u16,_vg4x1,,)(slice_base, zn, zm); } @@ -155,7 +155,7 @@ void test_uvmlsl_single_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZM:%.*]], [[ZN:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_sumlall_single_x1_s8(uint32_t slice_base, svint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_sumlall_single_x1_s8(uint32_t slice_base, svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsumla_za32,_s8,_vg4x1,,)(slice_base, zn, zm); } @@ -172,7 +172,7 @@ void test_sumlall_single_x1_s8(uint32_t slice_base, svint8_t zn, svuint8_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_usmlall_single_x1_u8(uint32_t slice_base, svuint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_usmlall_single_x1_u8(uint32_t slice_base, svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusmla_za32,_u8,_vg4x1,,)(slice_base, zn, zm); } @@ -197,7 +197,7 @@ void test_usmlall_single_x1_u8(uint32_t slice_base, svuint8_t zn, svint8_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_svmla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_s8,_vg4x2)(slice_base, zn, zm); } @@ -216,7 +216,7 @@ void test_svmla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za64,_s16,_vg4x2)(slice_base, zn, zm); } @@ -235,7 +235,7 @@ void test_svmla_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_svmla_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_u8,_vg4x2)(slice_base, zn, zm); } @@ -254,7 +254,7 @@ void test_svmla_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za64,_u16,_vg4x2)(slice_base, zn, zm); } @@ -275,7 +275,7 @@ void test_svmla_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_svmls_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_s8,_vg4x2)(slice_base, zn, zm); } @@ -294,7 +294,7 @@ void test_svmls_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za64,_s16,_vg4x2)(slice_base, zn, zm); } @@ -313,7 +313,7 @@ void test_svmls_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_svmls_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_u8,_vg4x2)(slice_base, zn, zm); } @@ -332,7 +332,7 @@ void test_svmls_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za64,_u16,_vg4x2)(slice_base, zn, zm); } @@ -353,7 +353,7 @@ void test_svmls_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsumla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_svsumla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsumla,_single,_za32,_s8,_vg4x2)(slice_base, zn, zm); } @@ -374,7 +374,7 @@ void test_svsumla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_usmlall_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_usmlall_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusmla,_single,_za32,_u8,_vg4x2)(slice_base, zn, zm); } @@ -403,7 +403,7 @@ void test_usmlall_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_svmla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_s8,_vg4x4)(slice_base, zn, zm); } @@ -426,7 +426,7 @@ void test_svmla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za64,_s16,_vg4x4)(slice_base, zn, zm); } @@ -449,7 +449,7 @@ void test_svmla_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_svmla_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za32,_u8,_vg4x4)(slice_base, zn, zm); } @@ -472,7 +472,7 @@ void test_svmla_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmla_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmla_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla,_single,_za64,_u16,_vg4x4)(slice_base, zn, zm); } @@ -497,7 +497,7 @@ void test_svmla_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_svmls_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_s8,_vg4x4)(slice_base, zn, zm); } @@ -520,7 +520,7 @@ void test_svmls_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za64,_s16,_vg4x4)(slice_base, zn, zm); } @@ -543,7 +543,7 @@ void test_svmls_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_svmls_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_u8,_vg4x4)(slice_base, zn, zm); } @@ -566,7 +566,7 @@ void test_svmls_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za64,_u16,_vg4x4)(slice_base, zn, zm); } @@ -591,7 +591,7 @@ void test_svmls_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsumla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_svsumla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsumla,_single,_za32,_s8,_vg4x4)(slice_base, zn, zm); } @@ -616,7 +616,7 @@ void test_svsumla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_usmlall_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_usmlall_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusmla,_single,_za32,_u8,_vg4x4)(slice_base, zn, zm); } @@ -645,7 +645,7 @@ void test_usmlall_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_mlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_shared_za +void test_mlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_s8,_vg4x2,,)(slice_base, zn, zm); } @@ -668,7 +668,7 @@ void test_mlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_mlal_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_shared_za +void test_mlal_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za64,_s16,_vg4x2,,)(slice_base, zn, zm); } @@ -691,7 +691,7 @@ void test_mlal_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_mlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_shared_za +void test_mlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_u8,_vg4x2,,)(slice_base, zn, zm); } @@ -714,7 +714,7 @@ void test_mlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_mlal_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_shared_za +void test_mlal_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za64,_u16,_vg4x2,,)(slice_base, zn, zm); } @@ -739,7 +739,7 @@ void test_mlal_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_mlsl_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_shared_za +void test_mlsl_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_s8,_vg4x2,,)(slice_base, zn, zm); } @@ -762,7 +762,7 @@ void test_mlsl_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_mlsl_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_shared_za +void test_mlsl_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za64,_s16,_vg4x2,,)(slice_base, zn, zm); } @@ -785,7 +785,7 @@ void test_mlsl_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_mlsl_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_shared_za +void test_mlsl_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_u8,_vg4x2,,)(slice_base, zn, zm); } @@ -808,7 +808,7 @@ void test_mlsl_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_mlsl_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_shared_za +void test_mlsl_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za64,_u16,_vg4x2,,)(slice_base, zn, zm); } @@ -833,7 +833,7 @@ void test_mlsl_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP2]], [[TMP3]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_sumlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_shared_za +void test_sumlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsumla_za32,_s8,_vg4x2,,)(slice_base, zn, zm); } @@ -858,7 +858,7 @@ void test_sumlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8x2_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_usmlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_shared_za +void test_usmlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusmla_za32,_u8,_vg4x2,,)(slice_base, zn, zm); } @@ -895,7 +895,7 @@ void test_usmlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8x2_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_mlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_shared_za +void test_mlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_s8,_vg4x4,,)(slice_base, zn, zm); } @@ -926,7 +926,7 @@ void test_mlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_mlal_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_shared_za +void test_mlal_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za64,_s16,_vg4x4,,)(slice_base, zn, zm); } @@ -957,7 +957,7 @@ void test_mlal_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_mlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_shared_za +void test_mlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za32,_u8,_vg4x4,,)(slice_base, zn, zm); } @@ -988,7 +988,7 @@ void test_mlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_mlal_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_shared_za +void test_mlal_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_za64,_u16,_vg4x4,,)(slice_base, zn, zm); } @@ -1021,7 +1021,7 @@ void test_mlal_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_mlsl_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_shared_za +void test_mlsl_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_s8,_vg4x4,,)(slice_base, zn, zm); } @@ -1052,7 +1052,7 @@ void test_mlsl_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_mlsl_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_shared_za +void test_mlsl_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za64,_s16,_vg4x4,,)(slice_base, zn, zm); } @@ -1083,7 +1083,7 @@ void test_mlsl_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_mlsl_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_shared_za +void test_mlsl_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_u8,_vg4x4,,)(slice_base, zn, zm); } @@ -1114,7 +1114,7 @@ void test_mlsl_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_mlsl_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_shared_za +void test_mlsl_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za64,_u16,_vg4x4,,)(slice_base, zn, zm); } @@ -1147,7 +1147,7 @@ void test_mlsl_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_sumlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_shared_za +void test_sumlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsumla_za32,_s8,_vg4x4,,)(slice_base, zn, zm); } @@ -1180,7 +1180,7 @@ void test_sumlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8x4_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_usmlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_shared_za +void test_usmlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusmla_za32,_u8,_vg4x4,,)(slice_base, zn, zm); } @@ -1201,7 +1201,7 @@ void test_usmlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8x4_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_s8,_vg4x1,,)(slice_base, zn, zm, 15); } @@ -1216,7 +1216,7 @@ void test_smlal_lane_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za64,_s16,_vg4x1,,)(slice_base, zn, zm, 7); } @@ -1231,7 +1231,7 @@ void test_smlal_lane_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_u8,_vg4x1,,)(slice_base, zn, zm, 15); } @@ -1246,7 +1246,7 @@ void test_smlal_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za64,_u16,_vg4x1,,)(slice_base, zn, zm, 7); } @@ -1263,7 +1263,7 @@ void test_smlal_lane_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_s8,_vg4x1,,)(slice_base, zn, zm, 15); } @@ -1278,7 +1278,7 @@ void test_smlsl_lane_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za64,_s16,_vg4x1,,)(slice_base, zn, zm, 7); } @@ -1293,7 +1293,7 @@ void test_smlsl_lane_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_u8,_vg4x1,,)(slice_base, zn, zm, 15); } @@ -1308,7 +1308,7 @@ void test_smlsl_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za64,_u16,_vg4x1,,)(slice_base, zn, zm, 7); } @@ -1325,7 +1325,7 @@ void test_smlsl_lane_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_sumlall_lane_x1_s8(uint32_t slice_base, svint8_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_sumlall_lane_x1_s8(uint32_t slice_base, svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsumla_lane_za32,_s8,_vg4x1,,)(slice_base, zn, zm, 15); } @@ -1342,7 +1342,7 @@ void test_sumlall_lane_x1_s8(uint32_t slice_base, svint8_t zn, svuint8_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_usmlall_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_usmlall_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusmla_lane_za32,_u8,_vg4x1,,)(slice_base, zn, zm, 15); } @@ -1367,7 +1367,7 @@ void test_usmlall_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svint8_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_s8,_vg4x2,,)(slice_base, zn, zm, 15); } @@ -1386,7 +1386,7 @@ void test_smlal_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za64,_s16,_vg4x2,,)(slice_base, zn, zm, 7); } @@ -1405,7 +1405,7 @@ void test_smlal_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_u8,_vg4x2,,)(slice_base, zn, zm, 15); } @@ -1424,7 +1424,7 @@ void test_smlal_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za64,_u16,_vg4x2,,)(slice_base, zn, zm, 7); } @@ -1445,7 +1445,7 @@ void test_smlal_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_s8,_vg4x2,,)(slice_base, zn, zm, 15); } @@ -1464,7 +1464,7 @@ void test_smlsl_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za64,_s16,_vg4x2,,)(slice_base, zn, zm, 7); } @@ -1483,7 +1483,7 @@ void test_smlsl_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_u8,_vg4x2,,)(slice_base, zn, zm, 15); } @@ -1502,7 +1502,7 @@ void test_smlsl_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za64,_u16,_vg4x2,,)(slice_base, zn, zm, 7); } @@ -1523,7 +1523,7 @@ void test_smlsl_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_sumlall_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_sumlall_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsumla_lane_za32,_s8,_vg4x2,,)(slice_base, zn, zm, 15); } @@ -1542,7 +1542,7 @@ void test_sumlall_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_usmlall_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_usmlall_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusmla_lane_za32,_u8,_vg4x2,,)(slice_base, zn, zm, 15); } @@ -1571,7 +1571,7 @@ void test_usmlall_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_s8,_vg4x4,,)(slice_base, zn, zm, 15); } @@ -1594,7 +1594,7 @@ void test_smlal_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za64,_s16,_vg4x4,,)(slice_base, zn, zm, 7); } @@ -1617,7 +1617,7 @@ void test_smlal_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za32,_u8,_vg4x4,,)(slice_base, zn, zm, 15); } @@ -1640,7 +1640,7 @@ void test_smlal_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlal_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_smlal_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmla_lane_za64,_u16,_vg4x4,,)(slice_base, zn, zm, 7); } @@ -1665,7 +1665,7 @@ void test_smlal_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_s8,_vg4x4,,)(slice_base, zn, zm, 15); } @@ -1688,7 +1688,7 @@ void test_smlsl_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za64,_s16,_vg4x4,,)(slice_base, zn, zm, 7); } @@ -1711,7 +1711,7 @@ void test_smlsl_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_u8,_vg4x4,,)(slice_base, zn, zm, 15); } @@ -1734,7 +1734,7 @@ void test_smlsl_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_smlsl_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_smlsl_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za64,_u16,_vg4x4,,)(slice_base, zn, zm, 7); } @@ -1759,7 +1759,7 @@ void test_smlsl_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_sumlall_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za +void test_sumlall_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsumla_lane_za32,_s8,_vg4x4,,)(slice_base, zn, zm, 15); } @@ -1784,7 +1784,7 @@ void test_sumlall_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // -void test_usmlall_lane_x4_s8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za +void test_usmlall_lane_x4_s8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusmla_lane_za32,_u8,_vg4x4,,)(slice_base, zn, zm, 15); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c index fd4bf390b8232..98eaa929cfb77 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c @@ -35,7 +35,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmls2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming __arm_shared_za { +void test_svmls2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_f32,_vg1x2,,)(slice_base, zn, zm); } @@ -65,7 +65,7 @@ void test_svmls2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmls4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __arm_streaming __arm_shared_za { +void test_svmls4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_f32,_vg1x4,,)(slice_base, zn, zm); } @@ -85,7 +85,7 @@ void test_svmls4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { +void test_svmls_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_f32,_vg1x2)(slice_base, zn, zm); } @@ -107,7 +107,7 @@ void test_svmls_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { +void test_svmls_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_f32,_vg1x4)(slice_base, zn, zm); } @@ -127,7 +127,7 @@ void test_svmls_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { +void test_svmls_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_f32,_vg1x2,,)(slice_base, zn, zm, 3); } @@ -149,7 +149,7 @@ void test_svmls_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za { +void test_svmls_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_f32,_vg1x4,,)(slice_base, zn, zm, 3); } @@ -173,7 +173,7 @@ void test_svmls_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmls2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming __arm_shared_za { +void test_svmls2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za64,_f64,_vg1x2,,)(slice_base, zn, zm); } @@ -203,7 +203,7 @@ void test_svmls2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmls4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __arm_streaming __arm_shared_za { +void test_svmls4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za64,_f64,_vg1x4,,)(slice_base, zn, zm); } @@ -223,7 +223,7 @@ void test_svmls4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { +void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x2)(slice_base, zn, zm); } @@ -245,7 +245,7 @@ void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { +void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm); } @@ -265,7 +265,7 @@ void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { +void test_svmls_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za64,_f64,_vg1x2,,)(slice_base, zn, zm, 1); } @@ -287,6 +287,6 @@ void test_svmls_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { +void test_svmls_lane4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za64,_f64,_vg1x4,,)(slice_base, zn, zm, 1); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c index afc7db797c9df..ea685079ebbca 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c @@ -35,7 +35,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmls2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_shared_za +void test_svmls2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_f16,_vg2x2,,)(slice_base, zn, zm); } @@ -58,7 +58,7 @@ void test_svmls2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmls2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_shared_za +void test_svmls2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_bf16,_vg2x2,,)(slice_base, zn, zm); } @@ -81,7 +81,7 @@ void test_svmls2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmls2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_shared_za +void test_svmls2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_u16,_vg2x2,,)(slice_base, zn, zm); } @@ -104,7 +104,7 @@ void test_svmls2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svmls2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_shared_za +void test_svmls2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_s16,_vg2x2,,)(slice_base, zn, zm); } @@ -135,7 +135,7 @@ void test_svmls2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmls4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_shared_za +void test_svmls4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_f16,_vg2x4,,)(slice_base, zn, zm); } @@ -166,7 +166,7 @@ void test_svmls4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmls4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_shared_za +void test_svmls4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_bf16,_vg2x4,,)(slice_base, zn, zm); } @@ -197,7 +197,7 @@ void test_svmls4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmls4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_shared_za +void test_svmls4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_u16,_vg2x4,,)(slice_base, zn, zm); } @@ -228,7 +228,7 @@ void test_svmls4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __ar // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svmls4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_shared_za +void test_svmls4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_s16,_vg2x4,,)(slice_base, zn, zm); } @@ -245,7 +245,7 @@ void test_svmls4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_f16,_vg2x1,,)(slice_base, zn, zm); } @@ -260,7 +260,7 @@ void test_svmls_single1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_bf16,_vg2x1,,)(slice_base, zn, zm); } @@ -275,7 +275,7 @@ void test_svmls_single1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_u16,_vg2x1,,)(slice_base, zn, zm); } @@ -290,7 +290,7 @@ void test_svmls_single1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_za32,_s16,_vg2x1,,)(slice_base, zn, zm); } @@ -309,7 +309,7 @@ void test_svmls_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_f16,_vg2x2)(slice_base, zn, zm); } @@ -328,7 +328,7 @@ void test_svmls_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_bf16,_vg2x2)(slice_base, zn, zm); } @@ -347,7 +347,7 @@ void test_svmls_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_u16,_vg2x2)(slice_base, zn, zm); } @@ -366,7 +366,7 @@ void test_svmls_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_s16,_vg2x2)(slice_base, zn, zm); } @@ -389,7 +389,7 @@ void test_svmls_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_f16,_vg2x4)(slice_base, zn, zm); } @@ -412,7 +412,7 @@ void test_svmls_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t z // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_bf16,_vg2x4)(slice_base, zn, zm); } @@ -435,7 +435,7 @@ void test_svmls_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_u16,_vg2x4)(slice_base, zn, zm); } @@ -458,7 +458,7 @@ void test_svmls_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmls_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls,_single,_za32,_s16,_vg2x4)(slice_base, zn, zm); } @@ -477,7 +477,7 @@ void test_svmls_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_f16,_vg2x1,,)(slice_base, zn, zm, 7); } @@ -492,7 +492,7 @@ void test_svmls_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_bf16,_vg2x1,,)(slice_base, zn, zm, 7); } @@ -507,7 +507,7 @@ void test_svmls_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_u16,_vg2x1,,)(slice_base, zn, zm, 7); } @@ -522,7 +522,7 @@ void test_svmls_lane1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_s16,_vg2x1,,)(slice_base, zn, zm, 7); } @@ -541,7 +541,7 @@ void test_svmls_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_f16,_vg2x2,,)(slice_base, zn, zm, 7); } @@ -560,7 +560,7 @@ void test_svmls_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_bf16,_vg2x2,,)(slice_base, zn, zm, 7); } @@ -579,7 +579,7 @@ void test_svmls_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_u16,_vg2x2,,)(slice_base, zn, zm, 7); } @@ -598,7 +598,7 @@ void test_svmls_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_s16,_vg2x2,,)(slice_base, zn, zm, 7); } @@ -621,7 +621,7 @@ void test_svmls_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_f16,_vg2x4,,)(slice_base, zn, zm, 7); } @@ -644,7 +644,7 @@ void test_svmls_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_bf16,_vg2x4,,)(slice_base, zn, zm, 7); } @@ -667,7 +667,7 @@ void test_svmls_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_u16,_vg2x4,,)(slice_base, zn, zm, 7); } @@ -690,7 +690,7 @@ void test_svmls_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // -void test_svmls_lane4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za +void test_svmls_lane4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmls_lane_za32,_s16,_vg2x4,,)(slice_base, zn, zm, 7); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c index eef99eb1b75a3..a2904dc1a6025 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mop.c @@ -33,7 +33,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmopa_za32,_s16,_m,)(3, pn, pm, zn, zm); } @@ -51,7 +51,7 @@ void test_svmopa_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmopa_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svmopa_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmopa_za32,_u16,_m,)(3, pn, pm, zn, zm); } @@ -71,7 +71,7 @@ void test_svmopa_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __a // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmops_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svmops_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmops_za32,_s16,_m,)(3, pn, pm, zn, zm); } @@ -89,6 +89,6 @@ void test_svmops_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.za32.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svmops_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svmops_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svmops_za32,_u16,_m,)(3, pn, pm, zn, zm); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c index 583a7fc815472..028a3d7b155dd 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -25,7 +25,7 @@ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg2(0, base); } @@ -47,7 +47,7 @@ svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_shar // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg2(0, base); } @@ -69,7 +69,7 @@ svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_share // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg2(0, base); } @@ -91,7 +91,7 @@ svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_shar // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg2(0, base); } @@ -121,7 +121,7 @@ svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_share // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg4(0, base); } @@ -151,7 +151,7 @@ svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_shar // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg4(0, base); } @@ -181,7 +181,7 @@ svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_share // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg4(0, base); } @@ -211,7 +211,7 @@ svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_shar // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg4(0, base); } @@ -233,7 +233,7 @@ svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_share // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg2(1, base); } @@ -255,7 +255,7 @@ svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg2(1, base); } @@ -277,7 +277,7 @@ svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg2(1, base); } @@ -299,7 +299,7 @@ svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg2(1, base); } @@ -321,7 +321,7 @@ svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg2(1, base); } @@ -343,7 +343,7 @@ svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg2(1, base); } @@ -365,7 +365,7 @@ svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg2(1, base); } @@ -387,7 +387,7 @@ svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg2(1, base); } @@ -417,7 +417,7 @@ svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg4(1, base); } @@ -447,7 +447,7 @@ svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg4(1, base); } @@ -477,7 +477,7 @@ svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg4(1, base); } @@ -507,7 +507,7 @@ svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg4(1, base); } @@ -537,7 +537,7 @@ svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg4(1, base); } @@ -567,7 +567,7 @@ svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg4(1, base); } @@ -597,7 +597,7 @@ svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg4(1, base); } @@ -627,7 +627,7 @@ svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg4(1, base); } @@ -649,7 +649,7 @@ svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg2(3, base); } @@ -671,7 +671,7 @@ svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg2(3, base); } @@ -693,7 +693,7 @@ svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg2(3, base); } @@ -715,7 +715,7 @@ svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg2(3, base); } @@ -737,7 +737,7 @@ svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg2(3, base); } @@ -759,7 +759,7 @@ svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg2(3, base); } @@ -789,7 +789,7 @@ svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg4(3, base); } @@ -819,7 +819,7 @@ svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg4(3, base); } @@ -849,7 +849,7 @@ svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg4(3, base); } @@ -879,7 +879,7 @@ svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg4(3, base); } @@ -909,7 +909,7 @@ svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg4(3, base); } @@ -939,7 +939,7 @@ svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg4(3, base); } @@ -961,7 +961,7 @@ svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg2(7, base); } @@ -983,7 +983,7 @@ svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg2(7, base); } @@ -1005,7 +1005,7 @@ svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg2(7, base); } @@ -1027,7 +1027,7 @@ svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg2(7, base); } @@ -1049,7 +1049,7 @@ svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg2(7, base); } @@ -1071,7 +1071,7 @@ svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg2(7, base); } @@ -1101,7 +1101,7 @@ svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg4(7, base); } @@ -1131,7 +1131,7 @@ svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg4(7, base); } @@ -1161,7 +1161,7 @@ svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg4(7, base); } @@ -1191,7 +1191,7 @@ svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg4(7, base); } @@ -1221,7 +1221,7 @@ svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_s // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg4(7, base); } @@ -1251,7 +1251,7 @@ svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg4(7, base); } @@ -1273,7 +1273,7 @@ svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x2(base); } @@ -1295,7 +1295,7 @@ svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x2(base); } @@ -1317,7 +1317,7 @@ svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_shared // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svread_za16_s16_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16x2_t test_svread_za16_s16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x2(base); } @@ -1339,7 +1339,7 @@ svint16x2_t test_svread_za16_s16_vg1x2(uint32_t base) __arm_streaming __arm_shar // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x2(base); } @@ -1361,7 +1361,7 @@ svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_sha // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x2(base); } @@ -1383,7 +1383,7 @@ svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x2(base); } @@ -1405,7 +1405,7 @@ svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x2(base); } @@ -1427,7 +1427,7 @@ svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_shar // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x2(base); } @@ -1449,7 +1449,7 @@ svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_sha // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x2(base); } @@ -1471,7 +1471,7 @@ svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x2(base); } @@ -1493,7 +1493,7 @@ svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_sha // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x2(base); } @@ -1515,7 +1515,7 @@ svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x2(base); } @@ -1545,7 +1545,7 @@ svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_shar // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x4(base); } @@ -1575,7 +1575,7 @@ svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_shared_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x4(base); } @@ -1605,7 +1605,7 @@ svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_shared // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x4(base); } @@ -1635,7 +1635,7 @@ svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_shar // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x4(base); } @@ -1665,7 +1665,7 @@ svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_sha // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x4(base); } @@ -1695,7 +1695,7 @@ svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x4(base); } @@ -1725,7 +1725,7 @@ svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x4(base); } @@ -1755,7 +1755,7 @@ svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_shar // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x4(base); } @@ -1785,7 +1785,7 @@ svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_sha // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x4(base); } @@ -1815,7 +1815,7 @@ svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x4(base); } @@ -1845,7 +1845,7 @@ svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_sha // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x4(base); } @@ -1875,6 +1875,6 @@ svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_sh // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x4(base); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c index 8c9a0a489ebf9..68913c5da4f75 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c @@ -36,7 +36,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,_single,_za32,_s32,_vg1x2)(slice_base, zn, zm); } @@ -54,7 +54,7 @@ void test_svsub_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,_single,_za32,_u32,_vg1x2)(slice_base, zn, zm); } @@ -72,7 +72,7 @@ void test_svsub_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,_single,_za64,_s64,_vg1x2)(slice_base, zn, zm); } @@ -90,7 +90,7 @@ void test_svsub_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,_single,_za64,_u64,_vg1x2)(slice_base, zn, zm); } @@ -114,7 +114,7 @@ void test_svsub_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,_single,_za32,_s32,_vg1x4)(slice_base, zn, zm); } @@ -136,7 +136,7 @@ void test_svsub_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,_single,_za32,_u32,_vg1x4)(slice_base, zn, zm); } @@ -158,7 +158,7 @@ void test_svsub_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,_single,_za64,_s64,_vg1x4)(slice_base, zn, zm); } @@ -180,7 +180,7 @@ void test_svsub_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,_single,_za64,_u64,_vg1x4)(slice_base, zn, zm); } @@ -208,7 +208,7 @@ void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,,_za32,_s32,_vg1x2)(slice_base, zn, zm); } @@ -230,7 +230,7 @@ void test_svsub_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,,_za32,_u32,_vg1x2)(slice_base, zn, zm); } @@ -252,7 +252,7 @@ void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,,_za64,_s64,_vg1x2)(slice_base, zn, zm); } @@ -274,7 +274,7 @@ void test_svsub_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x2)(slice_base, zn, zm); } @@ -306,7 +306,7 @@ void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,,_za32,_s32,_vg1x4)(slice_base, zn, zm); } @@ -336,7 +336,7 @@ void test_svsub_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,,_za32,_u32,_vg1x4)(slice_base, zn, zm); } @@ -366,7 +366,7 @@ void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,,_za64,_s64,_vg1x4)(slice_base, zn, zm); } @@ -396,7 +396,7 @@ void test_svsub_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) __arm_streaming __arm_shared_za { +void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x4)(slice_base, zn, zm); } @@ -420,7 +420,7 @@ void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za32,,_f32,,_vg1x2)(slice_base, zn); } @@ -438,7 +438,7 @@ void test_svsub_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_stre // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za32,,_s32,,_vg1x2)(slice_base , zn); } @@ -456,7 +456,7 @@ void test_svsub_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za32,,_u32,,_vg1x2)(slice_base, zn); } @@ -474,7 +474,7 @@ void test_svsub_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za64,,_f64,,_vg1x2)(slice_base, zn); } @@ -492,7 +492,7 @@ void test_svsub_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_stre // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za64,,_s64,,_vg1x2)(slice_base, zn); } @@ -510,7 +510,7 @@ void test_svsub_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za64,,_u64,,_vg1x2)(slice_base, zn); } @@ -534,7 +534,7 @@ void test_svsub_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za32,,_f32,,_vg1x4)(slice_base, zn); } @@ -556,7 +556,7 @@ void test_svsub_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_stre // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za32,,_s32,,_vg1x4)(slice_base, zn); } @@ -578,7 +578,7 @@ void test_svsub_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za32,,_u32,,_vg1x4)(slice_base, zn); } @@ -600,7 +600,7 @@ void test_svsub_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za64,,_f64,,_vg1x4)(slice_base, zn); } @@ -622,7 +622,7 @@ void test_svsub_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_stre // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za64,,_s64,,_vg1x4)(slice_base, zn); } @@ -644,6 +644,6 @@ void test_svsub_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) __arm_streaming __arm_shared_za { +void test_svsub_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsub_za64,,_u64,,_vg1x4)(slice_base, zn); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vdot.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vdot.c index fb313d4cebd72..9aa993f68b163 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vdot.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vdot.c @@ -28,7 +28,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svvdot_lane_za32_bf16_vg1x2(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svvdot_lane_za32_bf16_vg1x2(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svvdot_lane_za32,_bf16,_vg1x2)(slice_base, zn, zm, 3); } @@ -46,7 +46,7 @@ void test_svvdot_lane_za32_bf16_vg1x2(uint32_t slice_base, svbfloat16x2_t zn, sv // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svvdot_lane_za32_f16_vg1x2(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za { +void test_svvdot_lane_za32_f16_vg1x2(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svvdot_lane_za32,_f16,_vg1x2)(slice_base, zn, zm, 3); } @@ -64,7 +64,7 @@ void test_svvdot_lane_za32_f16_vg1x2(uint32_t slice_base, svfloat16x2_t zn, svfl // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svvdot_lane_za32_s16_vg1x2(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svvdot_lane_za32_s16_vg1x2(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svvdot_lane_za32,_s16,_vg1x2)(slice_base, zn, zm, 3); } @@ -82,7 +82,7 @@ void test_svvdot_lane_za32_s16_vg1x2(uint32_t slice_base, svint16x2_t zn, svint1 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svvdot_lane_za32_u16_vg1x2(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svvdot_lane_za32_u16_vg1x2(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svvdot_lane_za32,_u16,_vg1x2)(slice_base, zn, zm, 3); } @@ -104,7 +104,7 @@ void test_svvdot_lane_za32_u16_vg1x2(uint32_t slice_base, svuint16x2_t zn, svuin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svvdot_lane_za32_s8_vg1x4(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svvdot_lane_za32_s8_vg1x4(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svvdot_lane_za32,_s8,_vg1x4)(slice_base, zn, zm, 3); } @@ -126,7 +126,7 @@ void test_svvdot_lane_za32_s8_vg1x4(uint32_t slice_base, svint8x4_t zn, svint8_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svvdot_lane_za32_u8_vg1x4(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svvdot_lane_za32_u8_vg1x4(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svvdot_lane_za32,_u8,_vg1x4)(slice_base, zn, zm, 3); } @@ -148,7 +148,7 @@ void test_svvdot_lane_za32_u8_vg1x4(uint32_t slice_base, svuint8x4_t zn, svuint8 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // -void test_svvdot_lane_za64_s16_vg1x4(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za { +void test_svvdot_lane_za64_s16_vg1x4(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svvdot_lane_za64,_s16,_vg1x4)(slice_base, zn, zm, 1); } @@ -170,7 +170,7 @@ void test_svvdot_lane_za64_s16_vg1x4(uint32_t slice_base, svint16x4_t zn, svint1 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // -void test_svvdot_lane_za64_u16_vg1x4(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za { +void test_svvdot_lane_za64_u16_vg1x4(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svvdot_lane_za64,_u16,_vg1x4)(slice_base, zn, zm, 1); } @@ -193,7 +193,7 @@ void test_svvdot_lane_za64_u16_vg1x4(uint32_t slice_base, svuint16x4_t zn, svuin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svsuvdot_lane_za32_s8_vg1x4(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_shared_za { +void test_svsuvdot_lane_za32_s8_vg1x4(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svsuvdot_lane_za32,_s8,_vg1x4)(slice_base, zn, zm, 3); } @@ -216,7 +216,7 @@ void test_svsuvdot_lane_za32_s8_vg1x4(uint32_t slice_base, svint8x4_t zn, svint8 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // -void test_svusvdot_lane_za32_u8_vg1x4(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_shared_za { +void test_svusvdot_lane_za32_u8_vg1x4(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svusvdot_lane_za32,_u8,_vg1x4)(slice_base, zn, zm, 3); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c index 14b0371bce574..bf8e12b6b4969 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c @@ -30,7 +30,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za8,_u8,_vg2,)(0, base, val); } @@ -48,7 +48,7 @@ void test_svwrite_ver_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za8,_s8,_vg2,)(0, base, val); } @@ -66,7 +66,7 @@ void test_svwrite_ver_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za8,_u8,_vg2,)(0, base, val); } @@ -84,7 +84,7 @@ void test_svwrite_hor_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za8,_s8,_vg2,)(0, base, val); } @@ -106,7 +106,7 @@ void test_svwrite_hor_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za8,_u8,_vg4,)(0, base, val); } @@ -128,7 +128,7 @@ void test_svwrite_hor_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za8,_s8,_vg4,)(0, base, val); } @@ -150,7 +150,7 @@ void test_svwrite_hor_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za8,_u8,_vg4,)(0, base, val); } @@ -172,7 +172,7 @@ void test_svwrite_ver_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za8,_s8,_vg4,)(0, base, val); } @@ -190,7 +190,7 @@ void test_svwrite_ver_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za16,_u16,_vg2,)(1, base, val); } @@ -208,7 +208,7 @@ void test_svwrite_hor_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za16,_bf16,_vg2,)(1, base, val); } @@ -226,7 +226,7 @@ void test_svwrite_hor_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_str // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za16,_f16,_vg2,)(1, base, val); } @@ -244,7 +244,7 @@ void test_svwrite_hor_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za16,_s16,_vg2,)(1, base, val); } @@ -262,7 +262,7 @@ void test_svwrite_hor_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za16,_u16,_vg2,)(1, base, val); } @@ -280,7 +280,7 @@ void test_svwrite_ver_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za16,_bf16,_vg2,)(1, base, val); } @@ -298,7 +298,7 @@ void test_svwrite_ver_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_str // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za16,_f16,_vg2,)(1, base, val); } @@ -316,7 +316,7 @@ void test_svwrite_ver_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za16,_s16,_vg2,)(1, base, val); } @@ -338,7 +338,7 @@ void test_svwrite_ver_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za16,_u16,_vg4,)(1, base, val); } @@ -360,7 +360,7 @@ void test_svwrite_hor_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za16,_bf16,_vg4,)(1, base, val); } @@ -382,7 +382,7 @@ void test_svwrite_hor_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_str // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za16,_f16,_vg4,)(1, base, val); } @@ -404,7 +404,7 @@ void test_svwrite_hor_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za16,_s16,_vg4,)(1, base, val); } @@ -426,7 +426,7 @@ void test_svwrite_hor_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za16,_u16,_vg4,)(1, base, val); } @@ -448,7 +448,7 @@ void test_svwrite_ver_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za16,_bf16,_vg4,)(1, base, val); } @@ -470,7 +470,7 @@ void test_svwrite_ver_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_str // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za16,_f16,_vg4,)(1, base, val); } @@ -492,7 +492,7 @@ void test_svwrite_ver_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za16,_s16,_vg4,)(1, base, val); } @@ -510,7 +510,7 @@ void test_svwrite_ver_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za32,_u32,_vg2,)(3, base, val); } @@ -528,7 +528,7 @@ void test_svwrite_hor_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za32,_f32,_vg2,)(3, base, val); } @@ -546,7 +546,7 @@ void test_svwrite_hor_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za32,_s32,_vg2,)(3, base, val); } @@ -564,7 +564,7 @@ void test_svwrite_hor_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za32,_u32,_vg2,)(3, base, val); } @@ -582,7 +582,7 @@ void test_svwrite_ver_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za32,_f32,_vg2,)(3, base, val); } @@ -600,7 +600,7 @@ void test_svwrite_ver_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za32,_s32,_vg2,)(3, base, val); } @@ -622,7 +622,7 @@ void test_svwrite_ver_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za32,_u32,_vg4,)(3, base, val); } @@ -644,7 +644,7 @@ void test_svwrite_hor_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za32,_f32,_vg4,)(3, base, val); } @@ -666,7 +666,7 @@ void test_svwrite_hor_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za32,_s32,_vg4,)(3, base, val); } @@ -688,7 +688,7 @@ void test_svwrite_hor_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za32,_u32,_vg4,)(3, base, val); } @@ -710,7 +710,7 @@ void test_svwrite_ver_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za32,_f32,_vg4,)(3, base, val); } @@ -732,7 +732,7 @@ void test_svwrite_ver_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za32,_s32,_vg4,)(3, base, val); } @@ -750,7 +750,7 @@ void test_svwrite_ver_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za64,_u64,_vg2,)(7, base, val); } @@ -768,7 +768,7 @@ void test_svwrite_hor_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za64,_f64,_vg2,)(7, base, val); } @@ -786,7 +786,7 @@ void test_svwrite_hor_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za64,_s64,_vg2,)(7, base, val); } @@ -804,7 +804,7 @@ void test_svwrite_hor_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za64,_u64,_vg2,)(7, base, val); } @@ -822,7 +822,7 @@ void test_svwrite_ver_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za64,_f64,_vg2,)(7, base, val); } @@ -840,7 +840,7 @@ void test_svwrite_ver_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za64,_s64,_vg2,)(7, base, val); } @@ -862,7 +862,7 @@ void test_svwrite_ver_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za64,_u64,_vg4,)(7, base, val); } @@ -884,7 +884,7 @@ void test_svwrite_hor_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za64,_u64,_vg4,)(7, base, val); } @@ -906,7 +906,7 @@ void test_svwrite_ver_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_stream // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za64,_f64,_vg4,)(7, base, val); } @@ -928,7 +928,7 @@ void test_svwrite_hor_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_hor_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_hor_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_hor_za64,_s64,_vg4,)(7, base, val); } @@ -950,7 +950,7 @@ void test_svwrite_hor_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za64,_f64,_vg4,)(7, base, val); } @@ -972,7 +972,7 @@ void test_svwrite_ver_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_ver_za64,_s64,_vg4,)(7, base, val); } @@ -990,7 +990,7 @@ void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za8_s8_vg1x2(uint32_t base, svint8x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za8_s8_vg1x2(uint32_t base, svint8x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za8,_s8,_vg1x2,)(base, val); } @@ -1008,7 +1008,7 @@ void test_svwrite_za8_s8_vg1x2(uint32_t base, svint8x2_t val) __arm_streaming __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za8_u8_vg1x2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za8_u8_vg1x2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za8,_u8,_vg1x2,)(base, val); } @@ -1026,7 +1026,7 @@ void test_svwrite_za8_u8_vg1x2(uint32_t base, svuint8x2_t val) __arm_streaming _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za16_s16_vg1x2(uint32_t base, svint16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za16_s16_vg1x2(uint32_t base, svint16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za16,_s16,_vg1x2,)(base, val); } @@ -1044,7 +1044,7 @@ void test_svwrite_za16_s16_vg1x2(uint32_t base, svint16x2_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za16_u16_vg1x2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za16_u16_vg1x2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za16,_u16,_vg1x2,)(base, val); } @@ -1062,7 +1062,7 @@ void test_svwrite_za16_u16_vg1x2(uint32_t base, svuint16x2_t val) __arm_streamin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za16_bf16_vg1x2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za16_bf16_vg1x2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za16,_bf16,_vg1x2,)(base, val); } @@ -1080,7 +1080,7 @@ void test_svwrite_za16_bf16_vg1x2(uint32_t base, svbfloat16x2_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za16_f16_vg1x2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za16_f16_vg1x2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za16,_f16,_vg1x2,)(base, val); } @@ -1098,7 +1098,7 @@ void test_svwrite_za16_f16_vg1x2(uint32_t base, svfloat16x2_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za32_s32_vg1x2(uint32_t base, svint32x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za32_s32_vg1x2(uint32_t base, svint32x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za32,_s32,_vg1x2,)(base, val); } @@ -1116,7 +1116,7 @@ void test_svwrite_za32_s32_vg1x2(uint32_t base, svint32x2_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za32_u32_vg1x2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za32_u32_vg1x2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za32,_u32,_vg1x2,)(base, val); } @@ -1134,7 +1134,7 @@ void test_svwrite_za32_u32_vg1x2(uint32_t base, svuint32x2_t val) __arm_streamin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za32_f32_vg1x2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za32_f32_vg1x2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za32,_f32,_vg1x2,)(base, val); } @@ -1152,7 +1152,7 @@ void test_svwrite_za32_f32_vg1x2(uint32_t base, svfloat32x2_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_u64_vg1x2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za64_u64_vg1x2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za64,_u64,_vg1x2,)(base, val); } @@ -1170,7 +1170,7 @@ void test_svwrite_za64_u64_vg1x2(uint32_t base, svuint64x2_t val) __arm_streamin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_f64_vg1x2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za64_f64_vg1x2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za64,_f64,_vg1x2,)(base, val); } @@ -1188,7 +1188,7 @@ void test_svwrite_za64_f64_vg1x2(uint32_t base, svfloat64x2_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za64,_s64,_vg1x2,)(base, val); } @@ -1210,7 +1210,7 @@ void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za8_s8_vg1x4(uint32_t base, svint8x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za8_s8_vg1x4(uint32_t base, svint8x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za8,_s8,_vg1x4,)(base, val); } @@ -1232,7 +1232,7 @@ void test_svwrite_za8_s8_vg1x4(uint32_t base, svint8x4_t val) __arm_streaming __ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za8_u8_vg1x4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za8_u8_vg1x4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za8,_u8,_vg1x4,)(base, val); } @@ -1254,7 +1254,7 @@ void test_svwrite_za8_u8_vg1x4(uint32_t base, svuint8x4_t val) __arm_streaming _ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za16_s16_vg1x4(uint32_t base, svint16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za16_s16_vg1x4(uint32_t base, svint16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za16,_s16,_vg1x4,)(base, val); } @@ -1276,7 +1276,7 @@ void test_svwrite_za16_s16_vg1x4(uint32_t base, svint16x4_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za16_u16_vg1x4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za16_u16_vg1x4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za16,_u16,_vg1x4,)(base, val); } @@ -1298,7 +1298,7 @@ void test_svwrite_za16_u16_vg1x4(uint32_t base, svuint16x4_t val) __arm_streamin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za16_bf16_vg1x4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za16_bf16_vg1x4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za16,_bf16,_vg1x4,)(base, val); } @@ -1320,7 +1320,7 @@ void test_svwrite_za16_bf16_vg1x4(uint32_t base, svbfloat16x4_t val) __arm_strea // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za16_f16_vg1x4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za16_f16_vg1x4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za16,_f16,_vg1x4,)(base, val); } @@ -1342,7 +1342,7 @@ void test_svwrite_za16_f16_vg1x4(uint32_t base, svfloat16x4_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za32_s32_vg1x4(uint32_t base, svint32x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za32_s32_vg1x4(uint32_t base, svint32x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za32,_s32,_vg1x4,)(base, val); } @@ -1364,7 +1364,7 @@ void test_svwrite_za32_s32_vg1x4(uint32_t base, svint32x4_t val) __arm_streaming // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za32_u32_vg1x4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za32_u32_vg1x4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za32,_u32,_vg1x4,)(base, val); } @@ -1386,7 +1386,7 @@ void test_svwrite_za32_u32_vg1x4(uint32_t base, svuint32x4_t val) __arm_streamin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za32_f32_vg1x4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za32_f32_vg1x4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za32,_f32,_vg1x4,)(base, val); } @@ -1408,7 +1408,7 @@ void test_svwrite_za32_f32_vg1x4(uint32_t base, svfloat32x4_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_u64_vg1x4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za64_u64_vg1x4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za64,_u64,_vg1x4,)(base, val); } @@ -1430,7 +1430,7 @@ void test_svwrite_za64_u64_vg1x4(uint32_t base, svuint64x4_t val) __arm_streamin // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_f64_vg1x4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za64_f64_vg1x4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za64,_f64,_vg1x4,)(base, val); } @@ -1452,6 +1452,6 @@ void test_svwrite_za64_f64_vg1x4(uint32_t base, svfloat64x4_t val) __arm_streami // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svwrite_za64_s64_vg1x4(uint32_t base, svint64x4_t val) __arm_streaming __arm_shared_za { +void test_svwrite_za64_s64_vg1x4(uint32_t base, svint64x4_t val) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svwrite_za64,_s64,_vg1x4,)(base, val); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c index 31e8d6850fb28..081b1a1d2627c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c @@ -18,6 +18,6 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.zt(i32 0) // CPP-CHECK-NEXT: ret void // -void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za { +void test_svzero_zt(void) __arm_streaming_compatible __arm_out("za") { svzero_zt(0); } diff --git a/clang/test/Modules/aarch64-sme-keywords.cppm b/clang/test/Modules/aarch64-sme-keywords.cppm index 6784aaa01d219..df4dd32b16cff 100644 --- a/clang/test/Modules/aarch64-sme-keywords.cppm +++ b/clang/test/Modules/aarch64-sme-keywords.cppm @@ -13,8 +13,8 @@ export module A; export void f_streaming(void) __arm_streaming { } export void f_streaming_compatible(void) __arm_streaming_compatible { } -export void f_shared_za(void) __arm_shared_za { } -export void f_preserves_za(void) __arm_preserves_za { } +export void f_shared_za(void) __arm_inout("za") { } +export void f_preserves_za(void) __arm_preserves("za") { } //--- Use.cpp // expected-no-diagnostics @@ -50,11 +50,11 @@ import A; // CHECK-DAG: attributes #[[STREAMING_DECL]] = {{{.*}} "aarch64_pstate_sm_enabled" {{.*}}} // CHECK-DAG: attributes #[[STREAMING_COMPATIBLE_DECL]] = {{{.*}} "aarch64_pstate_sm_compatible" {{.*}}} // CHECK-DAG: attributes #[[SHARED_ZA_USE]] = { "aarch64_pstate_za_shared" } -// CHECK-DAG: attributes #[[PRESERVES_ZA_USE]] = { "aarch64_pstate_za_preserved" } +// CHECK-DAG: attributes #[[PRESERVES_ZA_USE]] = { "aarch64_pstate_za_preserved" "aarch64_pstate_za_shared" } // CHECK-DAG: attributes #[[STREAMING_USE]] = { "aarch64_pstate_sm_enabled" } // CHECK-DAG: attributes #[[STREAMING_COMPATIBLE_USE]] = { "aarch64_pstate_sm_compatible" } -void f_shared_za_caller(void) __arm_shared_za { +void f_shared_za_caller(void) __arm_inout("za") { f_shared_za(); f_preserves_za(); } diff --git a/clang/test/Parser/c2x-attribute-keywords.c b/clang/test/Parser/c2x-attribute-keywords.c index d8291b710e6db..b88d2b9c23e69 100644 --- a/clang/test/Parser/c2x-attribute-keywords.c +++ b/clang/test/Parser/c2x-attribute-keywords.c @@ -1,60 +1,64 @@ -// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,notc2x -Wno-strict-prototypes %s -// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,c2x %s - -enum __arm_streaming E { // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} - One __arm_streaming, // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} +// RUN: sed -e "s@ATTR_USE@__arm_streaming@g" -e "s@ATTR_NAME@__arm_streaming@g" %s > %t +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,notc2x -Wno-strict-prototypes %t +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,c2x %t +// RUN: sed -e "s@ATTR_USE@__arm_inout\(\"za\"\)@g" -e "s@ATTR_NAME@__arm_inout@g" %s > %t +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,notc2x -Wno-strict-prototypes %t +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify=expected,c2x %t + +enum ATTR_USE E { // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + One ATTR_USE, // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} Two, - Three __arm_streaming // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} + Three ATTR_USE // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} }; -enum __arm_streaming { Four }; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -__arm_streaming enum E2 { Five }; // expected-error {{misplaced '__arm_streaming'}} +enum ATTR_USE { Four }; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +ATTR_USE enum E2 { Five }; // expected-error {{misplaced 'ATTR_NAME'}} // FIXME: this diagnostic can be improved. -enum { __arm_streaming Six }; // expected-error {{expected identifier}} +enum { ATTR_USE Six }; // expected-error {{expected identifier}} // FIXME: this diagnostic can be improved. -enum E3 __arm_streaming { Seven }; // expected-error {{expected identifier or '('}} - -struct __arm_streaming S1 { // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} - int i __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} - int __arm_streaming j; // expected-error {{'__arm_streaming' only applies to function types}} - int k[10] __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} - int l __arm_streaming[10]; // expected-error {{'__arm_streaming' only applies to function types}} - __arm_streaming int m, n; // expected-error {{'__arm_streaming' only applies to function types}} - int o __arm_streaming : 12; // expected-error {{'__arm_streaming' only applies to function types}} - int __arm_streaming : 0; // expected-error {{'__arm_streaming' only applies to function types}} - int p, __arm_streaming : 0; // expected-error {{'__arm_streaming' cannot appear here}} - int q, __arm_streaming r; // expected-error {{'__arm_streaming' cannot appear here}} - __arm_streaming int; // expected-error {{'__arm_streaming' cannot appear here}} \ +enum E3 ATTR_USE { Seven }; // expected-error {{expected identifier or '('}} + +struct ATTR_USE S1 { // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + int i ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} + int ATTR_USE j; // expected-error {{'ATTR_NAME' only applies to function types}} + int k[10] ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} + int l ATTR_USE[10]; // expected-error {{'ATTR_NAME' only applies to function types}} + ATTR_USE int m, n; // expected-error {{'ATTR_NAME' only applies to function types}} + int o ATTR_USE : 12; // expected-error {{'ATTR_NAME' only applies to function types}} + int ATTR_USE : 0; // expected-error {{'ATTR_NAME' only applies to function types}} + int p, ATTR_USE : 0; // expected-error {{'ATTR_NAME' cannot appear here}} + int q, ATTR_USE r; // expected-error {{'ATTR_NAME' cannot appear here}} + ATTR_USE int; // expected-error {{'ATTR_NAME' cannot appear here}} \ // expected-warning {{declaration does not declare anything}} }; -__arm_streaming struct S2 { int a; }; // expected-error {{misplaced '__arm_streaming'}} -struct S3 __arm_streaming { int a; }; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} +ATTR_USE struct S2 { int a; }; // expected-error {{misplaced 'ATTR_NAME'}} +struct S3 ATTR_USE { int a; }; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} -union __arm_streaming U { // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} - double d __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types; type here is 'double'}} - __arm_streaming int i; // expected-error {{'__arm_streaming' only applies to function types; type here is 'int'}} +union ATTR_USE U { // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + double d ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types; type here is 'double'}} + ATTR_USE int i; // expected-error {{'ATTR_NAME' only applies to function types; type here is 'int'}} }; -__arm_streaming union U2 { double d; }; // expected-error {{misplaced '__arm_streaming'}} -union U3 __arm_streaming { double d; }; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} +ATTR_USE union U2 { double d; }; // expected-error {{misplaced 'ATTR_NAME'}} +union U3 ATTR_USE { double d; }; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} -struct __arm_streaming IncompleteStruct; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -union __arm_streaming IncompleteUnion; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -enum __arm_streaming IncompleteEnum; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} +struct ATTR_USE IncompleteStruct; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +union ATTR_USE IncompleteUnion; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE IncompleteEnum; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} -__arm_streaming void f1(void); // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -void __arm_streaming f2(void); // expected-error {{'__arm_streaming' only applies to function types}} -void f3 __arm_streaming (void); // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -void f4(void) __arm_streaming; +ATTR_USE void f1(void); // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} +void ATTR_USE f2(void); // expected-error {{'ATTR_NAME' only applies to function types}} +void f3 ATTR_USE (void); // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} +void f4(void) ATTR_USE; -void f5(int i __arm_streaming, __arm_streaming int j, int __arm_streaming k); // expected-error 3 {{'__arm_streaming' only applies to function types}} +void f5(int i ATTR_USE, ATTR_USE int j, int ATTR_USE k); // expected-error 3 {{'ATTR_NAME' only applies to function types}} -void f6(a, b) __arm_streaming int a; int b; { // expected-error {{'__arm_streaming' cannot appear here}} \ +void f6(a, b) ATTR_USE int a; int b; { // expected-error {{'ATTR_NAME' cannot appear here}} \ c2x-warning {{deprecated}} } @@ -63,57 +67,74 @@ void f6(a, b) __arm_streaming int a; int b; { // expected-error {{'__arm_streami // behavior given that we *don't* want to parse it as part of the K&R parameter // declarations. It is disallowed to avoid a parsing ambiguity we already // handle well. -int (*f7(a, b))(int, int) __arm_streaming int a; int b; { // c2x-warning {{deprecated}} +int (*f7(a, b))(int, int) ATTR_USE int a; int b; { // c2x-warning {{deprecated}} return 0; } -__arm_streaming int a, b; // expected-error {{'__arm_streaming' only applies to function types}} -int c __arm_streaming, d __arm_streaming; // expected-error 2 {{'__arm_streaming' only applies to function types}} +ATTR_USE int a, b; // expected-error {{'ATTR_NAME' only applies to function types}} +int c ATTR_USE, d ATTR_USE; // expected-error 2 {{'ATTR_NAME' only applies to function types}} -void f8(void) __arm_streaming { - __arm_streaming int i, j; // expected-error {{'__arm_streaming' only applies to function types}} - int k, l __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} +void f8(void) ATTR_USE { + ATTR_USE int i, j; // expected-error {{'ATTR_NAME' only applies to function types}} + int k, l ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} } -__arm_streaming void f9(void) { // expected-error {{'__arm_streaming' cannot be applied to a declaration}} - int i[10] __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} - int (*fp1)(void)__arm_streaming; - int (*fp2 __arm_streaming)(void); // expected-error {{'__arm_streaming' cannot be applied to a declaration}} +ATTR_USE void f9(void) { // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} + int i[10] ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} + int (*fp1)(void)ATTR_USE; + int (*fp2 ATTR_USE)(void); // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} - int * __arm_streaming *ipp; // expected-error {{'__arm_streaming' only applies to function types}} + int * ATTR_USE *ipp; // expected-error {{'ATTR_NAME' only applies to function types}} } -void f10(int j[static 10] __arm_streaming, int k[*] __arm_streaming); // expected-error 2 {{'__arm_streaming' only applies to function types}} +void f10(int j[static 10] ATTR_USE, int k[*] ATTR_USE); // expected-error 2 {{'ATTR_NAME' only applies to function types}} void f11(void) { - __arm_streaming {} // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming if (1) {} // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE {} // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE if (1) {} // expected-error {{'ATTR_NAME' cannot be applied to a statement}} - __arm_streaming switch (1) { // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming case 1: __arm_streaming break; // expected-error 2 {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming default: break; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE switch (1) { // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE case 1: ATTR_USE break; // expected-error 2 {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE default: break; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} } goto foo; - __arm_streaming foo: (void)1; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} + ATTR_USE foo: (void)1; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} - __arm_streaming for (;;); // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming while (1); // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming do __arm_streaming { } while(1); // expected-error 2 {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE for (;;); // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE while (1); // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE do ATTR_USE { } while(1); // expected-error 2 {{'ATTR_NAME' cannot be applied to a statement}} - __arm_streaming (void)1; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE (void)1; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} - __arm_streaming; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} - (void)sizeof(int [4]__arm_streaming); // expected-error {{'__arm_streaming' only applies to function types}} - (void)sizeof(struct __arm_streaming S3 { int a __arm_streaming; }); // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} \ - // expected-error {{'__arm_streaming' only applies to function types; type here is 'int'}} + (void)sizeof(int [4]ATTR_USE); // expected-error {{'ATTR_NAME' only applies to function types}} + (void)sizeof(struct ATTR_USE S3 { int a ATTR_USE; }); // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} \ + // expected-error {{'ATTR_NAME' only applies to function types; type here is 'int'}} - __arm_streaming return; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE return; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} - __arm_streaming asm (""); // expected-error {{'__arm_streaming' cannot appear here}} + ATTR_USE asm (""); // expected-error {{'ATTR_NAME' cannot appear here}} } -struct __arm_streaming S4 *s; // expected-error {{'__arm_streaming' cannot appear here}} +struct ATTR_USE S4 *s; // expected-error {{'ATTR_NAME' cannot appear here}} struct S5 {}; -int c = sizeof(struct __arm_streaming S5); // expected-error {{'__arm_streaming' cannot appear here}} +int c = sizeof(struct ATTR_USE S5); // expected-error {{'ATTR_NAME' cannot appear here}} + +void invalid_parentheses1() __arm_inout; // expected-error {{expected '(' after ''__arm_inout''}} +void invalid_parentheses2() __arm_inout(; // expected-error {{expected string literal as argument of '__arm_inout' attribute}} +void invalid_parentheses3() __arm_inout((); // expected-error {{expected string literal as argument of '__arm_inout' attribute}} +void invalid_parentheses4() __arm_inout); // expected-error {{expected '(' after ''__arm_inout''}} \ + // expected-error {{expected function body after function declarator}} +void invalid_parentheses5() __arm_inout(()); // expected-error {{expected string literal as argument of '__arm_inout' attribute}} +void invalid_parentheses6() __arm_inout("za"; // expected-error {{expected ')'}} +void invalid_parentheses7() __arm_streaming(; // expected-error {{expected parameter declarator}} \ + // expected-error {{expected ')'}} \ + // expected-note {{to match this '('}} \ + // expected-error {{function cannot return function type 'void ()'}} \ + // expected-error {{'__arm_streaming' only applies to function types; type here is 'int ()'}} \ + // expected-warning {{'__arm_streaming' only applies to non-K&R-style functions}} +void invalid_parentheses8() __arm_streaming(); // expected-error {{function cannot return function type 'void ()'}} \ + // expected-error {{'__arm_streaming' only applies to function types; type here is 'int ()'}} \ + // expected-warning {{'__arm_streaming' only applies to non-K&R-style functions}} diff --git a/clang/test/Parser/c2x-attribute-keywords.m b/clang/test/Parser/c2x-attribute-keywords.m index 2296be13cb714..575c88ffffc3d 100644 --- a/clang/test/Parser/c2x-attribute-keywords.m +++ b/clang/test/Parser/c2x-attribute-keywords.m @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -verify %s -enum __arm_streaming E1 : int; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} +enum __arm_inout("za") E1 : int; // expected-error {{'__arm_inout' only applies to non-K&R-style functions}} @interface Base @end @@ -15,5 +15,5 @@ - (S *) foo; void f(T *t) { - __arm_streaming[[t foo] bar]; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + __arm_inout("za")[[t foo] bar]; // expected-error {{'__arm_inout' cannot be applied to a statement}} } diff --git a/clang/test/Parser/cxx0x-keyword-attributes.cpp b/clang/test/Parser/cxx0x-keyword-attributes.cpp index 8d31efac53208..be7423cc7ecee 100644 --- a/clang/test/Parser/cxx0x-keyword-attributes.cpp +++ b/clang/test/Parser/cxx0x-keyword-attributes.cpp @@ -1,4 +1,7 @@ -// RUN: %clang_cc1 -fcxx-exceptions -fdeclspec -fexceptions -fsyntax-only -verify -std=c++11 -Wc++14-compat -Wc++14-extensions -Wc++17-extensions -triple aarch64-none-linux-gnu -target-feature +sme %s +// RUN: sed -e "s@ATTR_USE@__arm_streaming@g" -e "s@ATTR_NAME@__arm_streaming@g" %s > %t +// RUN: %clang_cc1 -fcxx-exceptions -fdeclspec -fexceptions -fsyntax-only -verify -std=c++11 -Wc++14-compat -Wc++14-extensions -Wc++17-extensions -triple aarch64-none-linux-gnu -target-feature +sme -x c++ %t +// RUN: sed -e "s@ATTR_USE@__arm_inout\(\"za\"\)@g" -e "s@ATTR_NAME@__arm_inout@g" %s > %t +// RUN: %clang_cc1 -fcxx-exceptions -fdeclspec -fexceptions -fsyntax-only -verify -std=c++11 -Wc++14-compat -Wc++14-extensions -Wc++17-extensions -triple aarch64-none-linux-gnu -target-feature +sme -x c++ %t // Need std::initializer_list namespace std { @@ -35,136 +38,136 @@ namespace std { // Declaration syntax checks -__arm_streaming int before_attr; // expected-error {{'__arm_streaming' only applies to function types}} -int __arm_streaming between_attr; // expected-error {{'__arm_streaming' only applies to function types}} -const __arm_streaming int between_attr_2 = 0; // expected-error {{'__arm_streaming' cannot appear here}} -int after_attr __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} -int * __arm_streaming ptr_attr; // expected-error {{'__arm_streaming' only applies to function types}} -int & __arm_streaming ref_attr = after_attr; // expected-error {{'__arm_streaming' only applies to function types}} -int && __arm_streaming rref_attr = 0; // expected-error {{'__arm_streaming' only applies to function types}} -int array_attr [1] __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} -void fn_attr () __arm_streaming; -void noexcept_fn_attr () noexcept __arm_streaming; +ATTR_USE int before_attr; // expected-error {{'ATTR_NAME' only applies to function types}} +int ATTR_USE between_attr; // expected-error {{'ATTR_NAME' only applies to function types}} +const ATTR_USE int between_attr_2 = 0; // expected-error {{'ATTR_NAME' cannot appear here}} +int after_attr ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} +int * ATTR_USE ptr_attr; // expected-error {{'ATTR_NAME' only applies to function types}} +int & ATTR_USE ref_attr = after_attr; // expected-error {{'ATTR_NAME' only applies to function types}} +int && ATTR_USE rref_attr = 0; // expected-error {{'ATTR_NAME' only applies to function types}} +int array_attr [1] ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} +void fn_attr () ATTR_USE; +void noexcept_fn_attr () noexcept ATTR_USE; struct MemberFnOrder { - virtual void f() const volatile && noexcept __arm_streaming final = 0; + virtual void f() const volatile && noexcept ATTR_USE final = 0; }; -struct __arm_streaming struct_attr; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -class __arm_streaming class_attr {}; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -union __arm_streaming union_attr; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -enum __arm_streaming E { }; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} +struct ATTR_USE struct_attr; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +class ATTR_USE class_attr {}; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +union ATTR_USE union_attr; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE E { }; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} namespace test_misplacement { -__arm_streaming struct struct_attr2; // expected-error {{misplaced '__arm_streaming'}} -__arm_streaming class class_attr2; // expected-error {{misplaced '__arm_streaming'}} -__arm_streaming union union_attr2; // expected-error {{misplaced '__arm_streaming'}} -__arm_streaming enum E2 { }; // expected-error {{misplaced '__arm_streaming'}} +ATTR_USE struct struct_attr2; // expected-error {{misplaced 'ATTR_NAME'}} +ATTR_USE class class_attr2; // expected-error {{misplaced 'ATTR_NAME'}} +ATTR_USE union union_attr2; // expected-error {{misplaced 'ATTR_NAME'}} +ATTR_USE enum E2 { }; // expected-error {{misplaced 'ATTR_NAME'}} } // Checks attributes placed at wrong syntactic locations of class specifiers. -class __arm_streaming __arm_streaming // expected-error 2 {{'__arm_streaming' only applies to non-K&R-style functions}} - attr_after_class_name_decl __arm_streaming __arm_streaming; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error 2 {{'__arm_streaming' only applies to non-K&R-style functions}} +class ATTR_USE ATTR_USE // expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} + attr_after_class_name_decl ATTR_USE ATTR_USE; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} -class __arm_streaming __arm_streaming // expected-error 2 {{'__arm_streaming' only applies to non-K&R-style functions}} - attr_after_class_name_definition __arm_streaming __arm_streaming __arm_streaming{}; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error 3 {{'__arm_streaming' only applies to non-K&R-style functions}} +class ATTR_USE ATTR_USE // expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} + attr_after_class_name_definition ATTR_USE ATTR_USE ATTR_USE{}; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error 3 {{'ATTR_NAME' only applies to non-K&R-style functions}} -class __arm_streaming c {}; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -class c __arm_streaming __arm_streaming x; // expected-error 2 {{'__arm_streaming' only applies to function types}} -class c __arm_streaming __arm_streaming y __arm_streaming __arm_streaming; // expected-error 4 {{'__arm_streaming' only applies to function types}} +class ATTR_USE c {}; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +class c ATTR_USE ATTR_USE x; // expected-error 2 {{'ATTR_NAME' only applies to function types}} +class c ATTR_USE ATTR_USE y ATTR_USE ATTR_USE; // expected-error 4 {{'ATTR_NAME' only applies to function types}} class c final [(int){0}]; class base {}; -class __arm_streaming __arm_streaming final_class // expected-error 2 {{'__arm_streaming' only applies to non-K&R-style functions}} - __arm_streaming alignas(float) final // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} - __arm_streaming alignas(float) __arm_streaming alignas(float): base{}; // expected-error {{'__arm_streaming' cannot appear here}} +class ATTR_USE ATTR_USE final_class // expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} + ATTR_USE alignas(float) final // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + ATTR_USE alignas(float) ATTR_USE alignas(float): base{}; // expected-error {{'ATTR_NAME' cannot appear here}} -class __arm_streaming __arm_streaming final_class_another // expected-error 2 {{'__arm_streaming' only applies to non-K&R-style functions}} - __arm_streaming __arm_streaming alignas(16) final // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error 2 {{'__arm_streaming' only applies to non-K&R-style functions}} - __arm_streaming __arm_streaming alignas(16) __arm_streaming{}; // expected-error {{'__arm_streaming' cannot appear here}} +class ATTR_USE ATTR_USE final_class_another // expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} + ATTR_USE ATTR_USE alignas(16) final // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} + ATTR_USE ATTR_USE alignas(16) ATTR_USE{}; // expected-error {{'ATTR_NAME' cannot appear here}} -class after_class_close {} __arm_streaming; // expected-error {{'__arm_streaming' cannot appear here, place it after "class" to apply it to the type declaration}} +class after_class_close {} ATTR_USE; // expected-error {{'ATTR_NAME' cannot appear here, place it after "class" to apply it to the type declaration}} class C {}; -__arm_streaming struct with_init_declarators {} init_declarator; // expected-error {{'__arm_streaming' only applies to function types}} -__arm_streaming struct no_init_declarators; // expected-error {{misplaced '__arm_streaming'}} -template __arm_streaming struct no_init_declarators_template; // expected-error {{'__arm_streaming' cannot appear here}} +ATTR_USE struct with_init_declarators {} init_declarator; // expected-error {{'ATTR_NAME' only applies to function types}} +ATTR_USE struct no_init_declarators; // expected-error {{misplaced 'ATTR_NAME'}} +template ATTR_USE struct no_init_declarators_template; // expected-error {{'ATTR_NAME' cannot appear here}} void fn_with_structs() { - __arm_streaming struct with_init_declarators {} init_declarator; // expected-error {{'__arm_streaming' only applies to function types}} - __arm_streaming struct no_init_declarators; // expected-error {{'__arm_streaming' cannot appear here}} + ATTR_USE struct with_init_declarators {} init_declarator; // expected-error {{'ATTR_NAME' only applies to function types}} + ATTR_USE struct no_init_declarators; // expected-error {{'ATTR_NAME' cannot appear here}} } -__arm_streaming; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} +ATTR_USE; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} struct ctordtor { - __arm_streaming ctordtor __arm_streaming () __arm_streaming; // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} - ctordtor (C) __arm_streaming; - __arm_streaming ~ctordtor __arm_streaming () __arm_streaming; // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} + ATTR_USE ctordtor ATTR_USE () ATTR_USE; // expected-error 2 {{'ATTR_NAME' cannot be applied to a declaration}} + ctordtor (C) ATTR_USE; + ATTR_USE ~ctordtor ATTR_USE () ATTR_USE; // expected-error 2 {{'ATTR_NAME' cannot be applied to a declaration}} }; -__arm_streaming ctordtor::ctordtor __arm_streaming () __arm_streaming {} // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} -__arm_streaming ctordtor::ctordtor (C) __arm_streaming try {} catch (...) {} // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -__arm_streaming ctordtor::~ctordtor __arm_streaming () __arm_streaming {} // expected-error 2 {{'__arm_streaming' cannot be applied to a declaration}} -extern "C++" __arm_streaming int extern_attr; // expected-error {{'__arm_streaming' only applies to function types}} -template __arm_streaming void template_attr (); // expected-error {{'__arm_streaming' cannot be applied to a declaration}} -__arm_streaming __arm_streaming int __arm_streaming __arm_streaming multi_attr __arm_streaming __arm_streaming; // expected-error 6 {{'__arm_streaming' only applies to function types}} - -int (paren_attr) __arm_streaming; // expected-error {{'__arm_streaming' cannot appear here}} -unsigned __arm_streaming int attr_in_decl_spec; // expected-error {{'__arm_streaming' cannot appear here}} -unsigned __arm_streaming int __arm_streaming const double_decl_spec = 0; // expected-error 2 {{'__arm_streaming' cannot appear here}} +ATTR_USE ctordtor::ctordtor ATTR_USE () ATTR_USE {} // expected-error 2 {{'ATTR_NAME' cannot be applied to a declaration}} +ATTR_USE ctordtor::ctordtor (C) ATTR_USE try {} catch (...) {} // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} +ATTR_USE ctordtor::~ctordtor ATTR_USE () ATTR_USE {} // expected-error 2 {{'ATTR_NAME' cannot be applied to a declaration}} +extern "C++" ATTR_USE int extern_attr; // expected-error {{'ATTR_NAME' only applies to function types}} +template ATTR_USE void template_attr (); // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} +ATTR_USE ATTR_USE int ATTR_USE ATTR_USE multi_attr ATTR_USE ATTR_USE; // expected-error 6 {{'ATTR_NAME' only applies to function types}} + +int (paren_attr) ATTR_USE; // expected-error {{'ATTR_NAME' cannot appear here}} +unsigned ATTR_USE int attr_in_decl_spec; // expected-error {{'ATTR_NAME' cannot appear here}} +unsigned ATTR_USE int ATTR_USE const double_decl_spec = 0; // expected-error 2 {{'ATTR_NAME' cannot appear here}} class foo { - void const_after_attr () __arm_streaming const; // expected-error {{expected ';'}} + void const_after_attr () ATTR_USE const; // expected-error {{expected ';'}} }; -extern "C++" __arm_streaming { } // expected-error {{'__arm_streaming' cannot appear here}} -__arm_streaming extern "C++" { } // expected-error {{'__arm_streaming' cannot appear here}} -__arm_streaming template void before_template_attr (); // expected-error {{'__arm_streaming' cannot appear here}} -__arm_streaming namespace ns { int i; } // expected-error {{'__arm_streaming' cannot appear here}} -__arm_streaming static_assert(true, ""); //expected-error {{'__arm_streaming' cannot appear here}} -__arm_streaming asm(""); // expected-error {{'__arm_streaming' cannot appear here}} - -__arm_streaming using ns::i; // expected-warning {{ISO C++}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -__arm_streaming using namespace ns; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -namespace __arm_streaming ns2 {} // expected-warning {{attributes on a namespace declaration are a C++17 extension}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} - -using __arm_streaming alignas(4)__arm_streaming ns::i; // expected-warning 2 {{ISO C++}} \ - expected-error {{'__arm_streaming' cannot appear here}} \ +extern "C++" ATTR_USE { } // expected-error {{'ATTR_NAME' cannot appear here}} +ATTR_USE extern "C++" { } // expected-error {{'ATTR_NAME' cannot appear here}} +ATTR_USE template void before_template_attr (); // expected-error {{'ATTR_NAME' cannot appear here}} +ATTR_USE namespace ns { int i; } // expected-error {{'ATTR_NAME' cannot appear here}} +ATTR_USE static_assert(true, ""); //expected-error {{'ATTR_NAME' cannot appear here}} +ATTR_USE asm(""); // expected-error {{'ATTR_NAME' cannot appear here}} + +ATTR_USE using ns::i; // expected-warning {{ISO C++}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +ATTR_USE using namespace ns; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +namespace ATTR_USE ns2 {} // expected-warning {{attributes on a namespace declaration are a C++17 extension}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + +using ATTR_USE alignas(4)ATTR_USE ns::i; // expected-warning 2 {{ISO C++}} \ + expected-error {{'ATTR_NAME' cannot appear here}} \ expected-error {{'alignas' attribute only applies to variables, data members and tag types}} \ expected-warning {{ISO C++}} \ - expected-error 2 {{'__arm_streaming' only applies to non-K&R-style functions}} -using __arm_streaming alignas(4) __arm_streaming foobar = int; // expected-error {{'__arm_streaming' cannot appear here}} \ + expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} +using ATTR_USE alignas(4) ATTR_USE foobar = int; // expected-error {{'ATTR_NAME' cannot appear here}} \ expected-error {{'alignas' attribute only applies to}} \ - expected-error 2 {{'__arm_streaming' only applies to function types}} - -__arm_streaming using T = int; // expected-error {{'__arm_streaming' cannot appear here}} -using T __arm_streaming = int; // expected-error {{'__arm_streaming' only applies to function types}} -template using U __arm_streaming = T; // expected-error {{'__arm_streaming' only applies to function types}} -using ns::i __arm_streaming; // expected-warning {{ISO C++}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -using ns::i __arm_streaming, ns::i __arm_streaming; // expected-warning 2 {{ISO C++}} \ + expected-error 2 {{'ATTR_NAME' only applies to function types}} + +ATTR_USE using T = int; // expected-error {{'ATTR_NAME' cannot appear here}} +using T ATTR_USE = int; // expected-error {{'ATTR_NAME' only applies to function types}} +template using U ATTR_USE = T; // expected-error {{'ATTR_NAME' only applies to function types}} +using ns::i ATTR_USE; // expected-warning {{ISO C++}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +using ns::i ATTR_USE, ns::i ATTR_USE; // expected-warning 2 {{ISO C++}} \ expected-warning {{use of multiple declarators in a single using declaration is a C++17 extension}} \ - expected-error 2 {{'__arm_streaming' only applies to non-K&R-style functions}} + expected-error 2 {{'ATTR_NAME' only applies to non-K&R-style functions}} struct using_in_struct_base { typedef int i, j, k, l; }; struct using_in_struct : using_in_struct_base { - __arm_streaming using using_in_struct_base::i; // expected-warning {{ISO C++}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} - using using_in_struct_base::j __arm_streaming; // expected-warning {{ISO C++}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} - __arm_streaming using using_in_struct_base::k __arm_streaming, using_in_struct_base::l __arm_streaming; // expected-warning 3 {{ISO C++}} \ + ATTR_USE using using_in_struct_base::i; // expected-warning {{ISO C++}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + using using_in_struct_base::j ATTR_USE; // expected-warning {{ISO C++}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + ATTR_USE using using_in_struct_base::k ATTR_USE, using_in_struct_base::l ATTR_USE; // expected-warning 3 {{ISO C++}} \ expected-warning {{use of multiple declarators in a single using declaration is a C++17 extension}} \ - expected-error 4 {{'__arm_streaming' only applies to non-K&R-style functions}} + expected-error 4 {{'ATTR_NAME' only applies to non-K&R-style functions}} }; -using __arm_streaming ns::i; // expected-warning {{ISO C++}} \ - expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -using T __arm_streaming = int; // expected-error {{'__arm_streaming' only applies to function types}} +using ATTR_USE ns::i; // expected-warning {{ISO C++}} \ + expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +using T ATTR_USE = int; // expected-error {{'ATTR_NAME' only applies to function types}} -auto trailing() -> __arm_streaming const int; // expected-error {{'__arm_streaming' cannot appear here}} -auto trailing() -> const __arm_streaming int; // expected-error {{'__arm_streaming' cannot appear here}} -auto trailing() -> const int __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} -auto trailing_2() -> struct struct_attr __arm_streaming; // expected-error {{'__arm_streaming' only applies to function types}} +auto trailing() -> ATTR_USE const int; // expected-error {{'ATTR_NAME' cannot appear here}} +auto trailing() -> const ATTR_USE int; // expected-error {{'ATTR_NAME' cannot appear here}} +auto trailing() -> const int ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} +auto trailing_2() -> struct struct_attr ATTR_USE; // expected-error {{'ATTR_NAME' only applies to function types}} namespace N { struct S {}; @@ -172,88 +175,88 @@ namespace N { template struct Template {}; // FIXME: Improve this diagnostic -struct __arm_streaming N::S s; // expected-error {{'__arm_streaming' cannot appear here}} -struct __arm_streaming Template t; // expected-error {{'__arm_streaming' cannot appear here}} -struct __arm_streaming ::template Template u; // expected-error {{'__arm_streaming' cannot appear here}} -template struct __arm_streaming Template; // expected-error {{'__arm_streaming' cannot appear here}} +struct ATTR_USE N::S s; // expected-error {{'ATTR_NAME' cannot appear here}} +struct ATTR_USE Template t; // expected-error {{'ATTR_NAME' cannot appear here}} +struct ATTR_USE ::template Template u; // expected-error {{'ATTR_NAME' cannot appear here}} +template struct ATTR_USE Template; // expected-error {{'ATTR_NAME' cannot appear here}} template struct __attribute__((pure)) Template; // We still allow GNU-style attributes here -template <> struct __arm_streaming Template; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} - -enum __arm_streaming E1 {}; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -enum __arm_streaming E2; // expected-error {{forbids forward references}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -enum __arm_streaming E1; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -enum __arm_streaming E3 : int; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -enum __arm_streaming { // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} - k_123 __arm_streaming = 123 // expected-warning {{attributes on an enumerator declaration are a C++17 extension}} \ - expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} +template <> struct ATTR_USE Template; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + +enum ATTR_USE E1 {}; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE E2; // expected-error {{forbids forward references}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE E1; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE E3 : int; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum ATTR_USE { // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} + k_123 ATTR_USE = 123 // expected-warning {{attributes on an enumerator declaration are a C++17 extension}} \ + expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} }; -enum __arm_streaming E1 e; // expected-error {{'__arm_streaming' cannot appear here}} -enum __arm_streaming class E4 { }; // expected-error {{'__arm_streaming' cannot appear here}} -enum struct __arm_streaming E5; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} -enum E6 {} __arm_streaming; // expected-error {{'__arm_streaming' cannot appear here, place it after "enum" to apply it to the type declaration}} +enum ATTR_USE E1 e; // expected-error {{'ATTR_NAME' cannot appear here}} +enum ATTR_USE class E4 { }; // expected-error {{'ATTR_NAME' cannot appear here}} +enum struct ATTR_USE E5; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} +enum E6 {} ATTR_USE; // expected-error {{'ATTR_NAME' cannot appear here, place it after "enum" to apply it to the type declaration}} struct S { - friend int f __arm_streaming (); // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} - friend int f2 __arm_streaming () {} // expected-error {{'__arm_streaming' cannot be applied to a declaration}} - __arm_streaming friend int g(); // expected-error {{'__arm_streaming' cannot appear here}} - __arm_streaming friend int h() { // expected-error {{'__arm_streaming' cannot be applied to a declaration}} + friend int f ATTR_USE (); // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' cannot be applied to a declaration}} + friend int f2 ATTR_USE () {} // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} + ATTR_USE friend int g(); // expected-error {{'ATTR_NAME' cannot appear here}} + ATTR_USE friend int h() { // expected-error {{'ATTR_NAME' cannot be applied to a declaration}} } - __arm_streaming friend int f3(), f4(), f5(); // expected-error {{'__arm_streaming' cannot appear here}} - friend int f6 __arm_streaming (), f7 __arm_streaming (), f8 __arm_streaming (); // expected-error3 {{'__arm_streaming' cannot appear here}} \ - expected-error 3 {{'__arm_streaming' cannot be applied to a declaration}} - friend class __arm_streaming C; // expected-error {{'__arm_streaming' cannot appear here}} - __arm_streaming friend class D; // expected-error {{'__arm_streaming' cannot appear here}} - __arm_streaming friend int; // expected-error {{'__arm_streaming' cannot appear here}} + ATTR_USE friend int f3(), f4(), f5(); // expected-error {{'ATTR_NAME' cannot appear here}} + friend int f6 ATTR_USE (), f7 ATTR_USE (), f8 ATTR_USE (); // expected-error3 {{'ATTR_NAME' cannot appear here}} \ + expected-error 3 {{'ATTR_NAME' cannot be applied to a declaration}} + friend class ATTR_USE C; // expected-error {{'ATTR_NAME' cannot appear here}} + ATTR_USE friend class D; // expected-error {{'ATTR_NAME' cannot appear here}} + ATTR_USE friend int; // expected-error {{'ATTR_NAME' cannot appear here}} }; template void tmpl (T) {} -template __arm_streaming void tmpl(char); // expected-error {{'__arm_streaming' cannot appear here}} -template void __arm_streaming tmpl(short); // expected-error {{'__arm_streaming' only applies to function types}} +template ATTR_USE void tmpl(char); // expected-error {{'ATTR_NAME' cannot appear here}} +template void ATTR_USE tmpl(short); // expected-error {{'ATTR_NAME' only applies to function types}} // Statement tests void foo () { - __arm_streaming ; // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming { } // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming if (0) { } // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming for (;;); // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming do { // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming continue; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE ; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE { } // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE if (0) { } // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE for (;;); // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE do { // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE continue; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} } while (0); - __arm_streaming while (0); // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE while (0); // expected-error {{'ATTR_NAME' cannot be applied to a statement}} - __arm_streaming switch (i) { // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming case 0: // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming default: // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming break; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE switch (i) { // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE case 0: // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE default: // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE break; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} } - __arm_streaming goto there; // expected-error {{'__arm_streaming' cannot be applied to a statement}} - __arm_streaming there: // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} + ATTR_USE goto there; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + ATTR_USE there: // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} - __arm_streaming try { // expected-error {{'__arm_streaming' cannot be applied to a statement}} - } __arm_streaming catch (...) { // expected-error {{'__arm_streaming' cannot appear here}} + ATTR_USE try { // expected-error {{'ATTR_NAME' cannot be applied to a statement}} + } ATTR_USE catch (...) { // expected-error {{'ATTR_NAME' cannot appear here}} } - void bar __arm_streaming (__arm_streaming int i, __arm_streaming int j); // expected-error 2 {{'__arm_streaming' only applies to function types}} \ - expected-error {{'__arm_streaming' cannot be applied to a declaration}} - using FuncType = void (__arm_streaming int); // expected-error {{'__arm_streaming' only applies to function types}} - void baz(__arm_streaming...); // expected-error {{expected parameter declarator}} + void bar ATTR_USE (ATTR_USE int i, ATTR_USE int j); // expected-error 2 {{'ATTR_NAME' only applies to function types}} \ + expected-error {{'ATTR_NAME' cannot be applied to a declaration}} + using FuncType = void (ATTR_USE int); // expected-error {{'ATTR_NAME' only applies to function types}} + void baz(ATTR_USE...); // expected-error {{expected parameter declarator}} - __arm_streaming return; // expected-error {{'__arm_streaming' cannot be applied to a statement}} + ATTR_USE return; // expected-error {{'ATTR_NAME' cannot be applied to a statement}} } // Expression tests void bar () { - new int[42]__arm_streaming[5]__arm_streaming{}; // expected-error {{'__arm_streaming' only applies to function types}} + new int[42]ATTR_USE[5]ATTR_USE{}; // expected-error {{'ATTR_NAME' only applies to function types}} } // Condition tests void baz () { - if (__arm_streaming bool b = true) { // expected-error {{'__arm_streaming' only applies to function types}} - switch (__arm_streaming int n { 42 }) { // expected-error {{'__arm_streaming' only applies to function types}} + if (ATTR_USE bool b = true) { // expected-error {{'ATTR_NAME' only applies to function types}} + switch (ATTR_USE int n { 42 }) { // expected-error {{'ATTR_NAME' only applies to function types}} default: - for (__arm_streaming int n = 0; __arm_streaming char b = n < 5; ++b) { // expected-error 2 {{'__arm_streaming' only applies to function types}} + for (ATTR_USE int n = 0; ATTR_USE char b = n < 5; ++b) { // expected-error 2 {{'ATTR_NAME' only applies to function types}} } } } @@ -261,37 +264,37 @@ void baz () { // An attribute can be applied to an expression-statement, such as the first // statement in a for. But it can't be applied to a condition which is an // expression. - for (__arm_streaming x = 0; ; ) {} // expected-error {{'__arm_streaming' cannot appear here}} - for (; __arm_streaming x < 5; ) {} // expected-error {{'__arm_streaming' cannot appear here}} - while (__arm_streaming bool k { false }) { // expected-error {{'__arm_streaming' only applies to function types}} + for (ATTR_USE x = 0; ; ) {} // expected-error {{'ATTR_NAME' cannot appear here}} + for (; ATTR_USE x < 5; ) {} // expected-error {{'ATTR_NAME' cannot appear here}} + while (ATTR_USE bool k { false }) { // expected-error {{'ATTR_NAME' only applies to function types}} } - while (__arm_streaming true) { // expected-error {{'__arm_streaming' cannot appear here}} + while (ATTR_USE true) { // expected-error {{'ATTR_NAME' cannot appear here}} } do { - } while (__arm_streaming false); // expected-error {{'__arm_streaming' cannot appear here}} + } while (ATTR_USE false); // expected-error {{'ATTR_NAME' cannot appear here}} - for (__arm_streaming int n : { 1, 2, 3 }) { // expected-error {{'__arm_streaming' only applies to function types}} + for (ATTR_USE int n : { 1, 2, 3 }) { // expected-error {{'ATTR_NAME' only applies to function types}} } } enum class __attribute__((visibility("hidden"))) SecretKeepers { one, /* rest are deprecated */ two, three }; -enum class __arm_streaming EvenMoreSecrets {}; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} +enum class ATTR_USE EvenMoreSecrets {}; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} // Forbid attributes on decl specifiers. -unsigned __arm_streaming static int __arm_streaming v1; // expected-error {{'__arm_streaming' only applies to function types}} \ - expected-error {{'__arm_streaming' cannot appear here}} -typedef __arm_streaming unsigned long __arm_streaming v2; // expected-error {{'__arm_streaming' only applies to function types}} \ - expected-error {{'__arm_streaming' cannot appear here}} -int __arm_streaming foo(int __arm_streaming x); // expected-error 2 {{'__arm_streaming' only applies to function types}} +unsigned ATTR_USE static int ATTR_USE v1; // expected-error {{'ATTR_NAME' only applies to function types}} \ + expected-error {{'ATTR_NAME' cannot appear here}} +typedef ATTR_USE unsigned long ATTR_USE v2; // expected-error {{'ATTR_NAME' only applies to function types}} \ + expected-error {{'ATTR_NAME' cannot appear here}} +int ATTR_USE foo(int ATTR_USE x); // expected-error 2 {{'ATTR_NAME' only applies to function types}} -__arm_streaming; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} +ATTR_USE; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} class A { - A(__arm_streaming int a); // expected-error {{'__arm_streaming' only applies to function types}} + A(ATTR_USE int a); // expected-error {{'ATTR_NAME' only applies to function types}} }; -A::A(__arm_streaming int a) {} // expected-error {{'__arm_streaming' only applies to function types}} +A::A(ATTR_USE int a) {} // expected-error {{'ATTR_NAME' only applies to function types}} template struct TemplateStruct {}; class FriendClassesWithAttributes { @@ -299,47 +302,47 @@ class FriendClassesWithAttributes { template friend class __attribute__((__type_visibility__("default"))) vector; template friend class __declspec(code_seg("foo,whatever")) vector2; // But not C++11 ones - template friend class __arm_streaming vector3; // expected-error {{'__arm_streaming' cannot appear here}} + template friend class ATTR_USE vector3; // expected-error {{'ATTR_NAME' cannot appear here}} // Also allowed friend struct __attribute__((__type_visibility__("default"))) TemplateStruct; friend struct __declspec(code_seg("foo,whatever")) TemplateStruct; - friend struct __arm_streaming TemplateStruct; // expected-error {{'__arm_streaming' cannot appear here}} + friend struct ATTR_USE TemplateStruct; // expected-error {{'ATTR_NAME' cannot appear here}} }; // Check ordering: C++11 attributes must appear before GNU attributes. class Ordering { void f1( - int (__arm_streaming __attribute__(()) int n) // expected-error {{'__arm_streaming' only applies to function types}} + int (ATTR_USE __attribute__(()) int n) // expected-error {{'ATTR_NAME' only applies to function types}} ) { } void f2( - int (*)(__arm_streaming __attribute__(()) int n) // expected-error {{'__arm_streaming' only applies to function types}} + int (*)(ATTR_USE __attribute__(()) int n) // expected-error {{'ATTR_NAME' only applies to function types}} ) { } void f3( - int (__attribute__(()) __arm_streaming int n) // expected-error {{'__arm_streaming' cannot appear here}} + int (__attribute__(()) ATTR_USE int n) // expected-error {{'ATTR_NAME' cannot appear here}} ) { } void f4( - int (*)(__attribute__(()) __arm_streaming int n) // expected-error {{'__arm_streaming' cannot appear here}} + int (*)(__attribute__(()) ATTR_USE int n) // expected-error {{'ATTR_NAME' cannot appear here}} ) { } }; namespace base_specs { struct A {}; -struct B : __arm_streaming A {}; // expected-error {{'__arm_streaming' cannot be applied to a base specifier}} -struct C : __arm_streaming virtual A {}; // expected-error {{'__arm_streaming' cannot be applied to a base specifier}} -struct D : __arm_streaming public virtual A {}; // expected-error {{'__arm_streaming' cannot be applied to a base specifier}} -struct E : public __arm_streaming virtual A {}; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' cannot be applied to a base specifier}} -struct F : virtual __arm_streaming public A {}; // expected-error {{'__arm_streaming' cannot appear here}} \ - expected-error {{'__arm_streaming' cannot be applied to a base specifier}} +struct B : ATTR_USE A {}; // expected-error {{'ATTR_NAME' cannot be applied to a base specifier}} +struct C : ATTR_USE virtual A {}; // expected-error {{'ATTR_NAME' cannot be applied to a base specifier}} +struct D : ATTR_USE public virtual A {}; // expected-error {{'ATTR_NAME' cannot be applied to a base specifier}} +struct E : public ATTR_USE virtual A {}; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' cannot be applied to a base specifier}} +struct F : virtual ATTR_USE public A {}; // expected-error {{'ATTR_NAME' cannot appear here}} \ + expected-error {{'ATTR_NAME' cannot be applied to a base specifier}} } -namespace __arm_streaming ns_attr {}; // expected-error {{'__arm_streaming' only applies to non-K&R-style functions}} \ +namespace ATTR_USE ns_attr {}; // expected-error {{'ATTR_NAME' only applies to non-K&R-style functions}} \ expected-warning {{attributes on a namespace declaration are a C++17 extension}} diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c index e63d9f0a84757..476da8534ce76 100644 --- a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c +++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c @@ -23,7 +23,7 @@ int16x8_t incompat_neon_smc(int16x8_t splat) __arm_streaming_compatible { return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33); } -void incompat_sme_smc(svbool_t pg, void const *ptr) __arm_streaming_compatible __arm_shared_za { +void incompat_sme_smc(svbool_t pg, void const *ptr) __arm_streaming_compatible __arm_inout("za") { // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr); } @@ -58,7 +58,7 @@ svuint32_t incompat_sve2_smc(svbool_t pg, svuint32_t a, int64_t b) __arm_streami return __builtin_sve_svldnt1_gather_u32base_index_u32(pg, a, b); } -void incompat_sme_sm(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_shared_za { +void incompat_sme_sm(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_inout("za") { // expected-warning@+1 {{builtin call has undefined behaviour when called from a non-streaming function}} svmops_za32_f32_m(0, pn, pm, zn, zm); } diff --git a/clang/test/Sema/aarch64-sme-func-attrs-without-target-feature.cpp b/clang/test/Sema/aarch64-sme-func-attrs-without-target-feature.cpp index b59d67f7f57b8..0a54a94f408b7 100644 --- a/clang/test/Sema/aarch64-sme-func-attrs-without-target-feature.cpp +++ b/clang/test/Sema/aarch64-sme-func-attrs-without-target-feature.cpp @@ -4,16 +4,16 @@ void streaming_compatible_def() __arm_streaming_compatible {} // OK void streaming_def() __arm_streaming { } // expected-error {{function executed in streaming-SVE mode requires 'sme'}} -void shared_za_def() __arm_shared_za { } // expected-error {{function using ZA state requires 'sme'}} -__arm_new_za void new_za_def() { } // expected-error {{function using ZA state requires 'sme'}} +void shared_za_def() __arm_inout("za") { } // expected-error {{function using ZA state requires 'sme'}} +__arm_new("za") void new_za_def() { } // expected-error {{function using ZA state requires 'sme'}} __arm_locally_streaming void locally_streaming_def() { } // expected-error {{function executed in streaming-SVE mode requires 'sme'}} -void streaming_shared_za_def() __arm_streaming __arm_shared_za { } // expected-error {{function executed in streaming-SVE mode requires 'sme'}} +void streaming_shared_za_def() __arm_streaming __arm_inout("za") { } // expected-error {{function executed in streaming-SVE mode requires 'sme'}} // It should work fine when we explicitly add the target("sme") attribute. __attribute__((target("sme"))) void streaming_compatible_def_sme_attr() __arm_streaming_compatible {} // OK __attribute__((target("sme"))) void streaming_def_sme_attr() __arm_streaming { } // OK -__attribute__((target("sme"))) void shared_za_def_sme_attr() __arm_shared_za { } // OK -__arm_new_za __attribute__((target("sme"))) void new_za_def_sme_attr() {} // OK +__attribute__((target("sme"))) void shared_za_def_sme_attr() __arm_inout("za") { } // OK +__arm_new("za") __attribute__((target("sme"))) void new_za_def_sme_attr() {} // OK __arm_locally_streaming __attribute__((target("sme"))) void locally_streaming_def_sme_attr() {} // OK // Test that it also works with the target("sme2") attribute. @@ -22,7 +22,7 @@ __attribute__((target("sme2"))) void streaming_def_sme2_attr() __arm_streaming { // No code is generated for declarations, so it should be fine to declare using the attribute. void streaming_compatible_decl() __arm_streaming_compatible; // OK void streaming_decl() __arm_streaming; // OK -void shared_za_decl() __arm_shared_za; // OK +void shared_za_decl() __arm_inout("za"); // OK void non_streaming_decl(); void non_streaming_def(void (*streaming_fn_ptr)(void) __arm_streaming, diff --git a/clang/test/Sema/aarch64-sme-func-attrs.c b/clang/test/Sema/aarch64-sme-func-attrs.c index 73c0934d689e7..b986b0b3de2e1 100644 --- a/clang/test/Sema/aarch64-sme-func-attrs.c +++ b/clang/test/Sema/aarch64-sme-func-attrs.c @@ -6,27 +6,25 @@ void sme_arm_streaming(void) __arm_streaming; void sme_arm_streaming_compatible(void) __arm_streaming_compatible; -__arm_new_za void sme_arm_new_za(void) {} -void sme_arm_shared_za(void) __arm_shared_za; -void sme_arm_preserves_za(void) __arm_preserves_za; +__arm_new("za") void sme_arm_new_za(void) {} +void sme_arm_shared_za(void) __arm_inout("za"); +void sme_arm_preserves_za(void) __arm_preserves("za"); -__arm_new_za void sme_arm_streaming_new_za(void) __arm_streaming {} -void sme_arm_streaming_shared_za(void) __arm_streaming __arm_shared_za; -void sme_arm_streaming_preserves_za(void) __arm_streaming __arm_preserves_za; +__arm_new("za") void sme_arm_streaming_new_za(void) __arm_streaming {} +void sme_arm_streaming_shared_za(void) __arm_streaming __arm_inout("za"); +void sme_arm_streaming_preserves_za(void) __arm_streaming __arm_preserves("za"); -__arm_new_za void sme_arm_sc_new_za(void) __arm_streaming_compatible {} -void sme_arm_sc_shared_za(void) __arm_streaming_compatible __arm_shared_za; -void sme_arm_sc_preserves_za(void) __arm_streaming_compatible __arm_preserves_za; - -void sme_arm_shared_preserves_za(void) __arm_shared_za __arm_preserves_za; +__arm_new("za") void sme_arm_sc_new_za(void) __arm_streaming_compatible {} +void sme_arm_sc_shared_za(void) __arm_streaming_compatible __arm_inout("za"); +void sme_arm_sc_preserves_za(void) __arm_streaming_compatible __arm_preserves("za"); __arm_locally_streaming void sme_arm_locally_streaming(void) { } __arm_locally_streaming void sme_arm_streaming_and_locally_streaming(void) __arm_streaming { } __arm_locally_streaming void sme_arm_streaming_and_streaming_compatible(void) __arm_streaming_compatible { } -__arm_locally_streaming __arm_new_za void sme_arm_ls_new_za(void) { } -__arm_locally_streaming void sme_arm_ls_shared_za(void) __arm_shared_za { } -__arm_locally_streaming void sme_arm_ls_preserves_za(void) __arm_preserves_za { } +__arm_locally_streaming __arm_new("za") void sme_arm_ls_new_za(void) { } +__arm_locally_streaming void sme_arm_ls_shared_za(void) __arm_inout("za") { } +__arm_locally_streaming void sme_arm_ls_preserves_za(void) __arm_preserves("za") { } // Valid attributes on function pointers @@ -38,18 +36,14 @@ void streaming_compatible_ptr(void) __arm_streaming_compatible; typedef void (*fptrty2) (void) __arm_streaming_compatible; fptrty2 call_sc_func() { return streaming_compatible_ptr; } -void shared_za_ptr(void) __arm_shared_za; -typedef void (*fptrty3) (void) __arm_shared_za; +void shared_za_ptr(void) __arm_inout("za"); +typedef void (*fptrty3) (void) __arm_inout("za"); fptrty3 call_shared_za_func() { return shared_za_ptr; } -void preserves_za_ptr(void) __arm_preserves_za; -typedef void (*fptrty4) (void) __arm_preserves_za; +void preserves_za_ptr(void) __arm_preserves("za"); +typedef void (*fptrty4) (void) __arm_preserves("za"); fptrty4 call_preserve_za_func() { return preserves_za_ptr; } -void shared_preserves_za_ptr(void) __arm_shared_za __arm_preserves_za; -typedef void (*fptrty5) (void) __arm_shared_za __arm_preserves_za; -fptrty5 call_shared_preserve_za_func() { return shared_preserves_za_ptr; } - typedef void (*fptrty6) (void); fptrty6 cast_nza_func_to_normal() { return sme_arm_new_za; } fptrty6 cast_ls_func_to_normal() { return sme_arm_locally_streaming; } @@ -68,13 +62,13 @@ void streaming_mode(void) __arm_streaming __arm_streaming_compatible; // expected-note@+1 {{conflicting attribute is here}} void streaming_compatible(void) __arm_streaming_compatible __arm_streaming; -// expected-cpp-error@+2 {{'__arm_new_za' and '__arm_shared_za' are not compatible}} -// expected-error@+1 {{'__arm_new_za' and '__arm_shared_za' are not compatible}} -__arm_new_za void new_shared_za(void) __arm_shared_za {} +// expected-cpp-error@+2 {{'__arm_new("za")' and '__arm_inout("za")' are not compatible}} +// expected-error@+1 {{'__arm_new("za")' and '__arm_inout("za")' are not compatible}} +__arm_new("za") void new_shared_za(void) __arm_inout("za") {} -// expected-cpp-error@+2 {{'__arm_new_za' and '__arm_preserves_za' are not compatible}} -// expected-error@+1 {{'__arm_new_za' and '__arm_preserves_za' are not compatible}} -__arm_new_za void new_preserves_za(void) __arm_preserves_za {} +// expected-cpp-error@+2 {{'__arm_new("za")' and '__arm_preserves("za")' are not compatible}} +// expected-error@+1 {{'__arm_new("za")' and '__arm_preserves("za")' are not compatible}} +__arm_new("za") void new_preserves_za(void) __arm_preserves("za") {} // Invalid attributes on function pointers @@ -125,24 +119,25 @@ sc_ptrty return_invalid_fptr_streaming_compatible_normal(n_ptrty f) { return f; // expected-error@+1 {{incompatible function pointer types returning 'sc_ptrty' (aka 'void (*)(void) __arm_streaming_compatible') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} n_ptrty return_invalid_fptr_normal_streaming_compatible(sc_ptrty f) { return f; } -typedef void (*sz_ptrty) (void) __arm_shared_za; +typedef void (*sz_ptrty) (void) __arm_inout("za"); sz_ptrty return_valid_shared_za_fptr(sz_ptrty f) { return f; } -// expected-cpp-error@+2 {{cannot initialize return object of type 'sz_ptrty' (aka 'void (*)() __arm_shared_za') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} -// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'sz_ptrty' (aka 'void (*)(void) __arm_shared_za')}} +// expected-cpp-error@+2 {{cannot initialize return object of type 'sz_ptrty' (aka 'void (*)() __arm_inout("za")') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'sz_ptrty' (aka 'void (*)(void) __arm_inout("za")')}} sz_ptrty return_invalid_fptr_shared_za_normal(n_ptrty f) { return f; } -// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 'sz_ptrty' (aka 'void (*)() __arm_shared_za')}} -// expected-error@+1 {{incompatible function pointer types returning 'sz_ptrty' (aka 'void (*)(void) __arm_shared_za') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 'sz_ptrty' (aka 'void (*)() __arm_inout("za")')}} +// expected-error@+1 {{incompatible function pointer types returning 'sz_ptrty' (aka 'void (*)(void) __arm_inout("za")') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} n_ptrty return_invalid_fptr_normal_shared_za(sz_ptrty f) { return f; } -typedef void (*pz_ptrty) (void) __arm_preserves_za; +typedef void (*pz_ptrty) (void) __arm_preserves("za"); pz_ptrty return_valid_preserves_za_fptr(pz_ptrty f) { return f; } -// expected-cpp-error@+2 {{cannot initialize return object of type 'pz_ptrty' (aka 'void (*)() __arm_preserves_za') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} -// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'pz_ptrty' (aka 'void (*)(void) __arm_preserves_za')}} +// expected-cpp-error@+2 {{cannot initialize return object of type 'pz_ptrty' (aka 'void (*)() __arm_preserves("za")') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'pz_ptrty' (aka 'void (*)(void) __arm_preserves("za")')}} pz_ptrty return_invalid_fptr_preserves_za_normal(n_ptrty f) { return f; } -// No diagnostics, the preserves_za hint should be dropped silently. +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 'pz_ptrty' (aka 'void (*)() __arm_preserves("za")')}} +// expected-error@+1 {{incompatible function pointer types returning 'pz_ptrty' (aka 'void (*)(void) __arm_preserves("za")') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} n_ptrty return_invalid_fptr_normal_preserves_za(pz_ptrty f) { return f; } // Test template instantiations @@ -164,21 +159,21 @@ template short templated(short); void redecl(void) __arm_streaming; void redecl(void) __arm_streaming_compatible { } -// expected-error@+5 {{function declared 'void (void) __arm_shared_za' was previously declared 'void (void) __arm_shared_za __arm_preserves_za', which has different SME function attributes}} +// expected-error@+5 {{function declared 'void (void)' was previously declared 'void (void) __arm_preserves("za")', which has different SME function attributes}} // expected-note@+3 {{previous declaration is here}} -// expected-cpp-error@+3 {{function declared 'void () __arm_shared_za' was previously declared 'void () __arm_shared_za __arm_preserves_za', which has different SME function attributes}} +// expected-cpp-error@+3 {{function declared 'void ()' was previously declared 'void () __arm_preserves("za")', which has different SME function attributes}} // expected-cpp-note@+1 {{previous declaration is here}} -void redecl_preserve_za(void) __arm_shared_za __arm_preserves_za;; -void redecl_preserve_za(void) __arm_shared_za {} +void redecl_preserve_za(void) __arm_preserves("za");; +void redecl_preserve_za(void) {} -// expected-error@+5 {{function declared 'void (void) __arm_shared_za __arm_preserves_za' was previously declared 'void (void) __arm_shared_za', which has different SME function attributes}} +// expected-error@+5 {{function declared 'void (void) __arm_preserves("za")' was previously declared 'void (void)', which has different SME function attributes}} // expected-note@+3 {{previous declaration is here}} -// expected-cpp-error@+3 {{function declared 'void () __arm_shared_za __arm_preserves_za' was previously declared 'void () __arm_shared_za', which has different SME function attributes}} +// expected-cpp-error@+3 {{function declared 'void () __arm_preserves("za")' was previously declared 'void ()', which has different SME function attributes}} // expected-cpp-note@+1 {{previous declaration is here}} -void redecl_nopreserve_za(void) __arm_shared_za; -void redecl_nopreserve_za(void) __arm_shared_za __arm_preserves_za {} +void redecl_nopreserve_za(void); +void redecl_nopreserve_za(void) __arm_preserves("za") {} -void non_za_definition(void (*shared_za_fn_ptr)(void) __arm_shared_za) { +void non_za_definition(void (*shared_za_fn_ptr)(void) __arm_inout("za"), void (*preserves_za_fn_ptr)(void) __arm_preserves("za")) { sme_arm_new_za(); // OK // expected-error@+2 {{call to a shared ZA function requires the caller to have ZA state}} // expected-cpp-error@+1 {{call to a shared ZA function requires the caller to have ZA state}} @@ -186,43 +181,46 @@ void non_za_definition(void (*shared_za_fn_ptr)(void) __arm_shared_za) { // expected-error@+2 {{call to a shared ZA function requires the caller to have ZA state}} // expected-cpp-error@+1 {{call to a shared ZA function requires the caller to have ZA state}} shared_za_fn_ptr(); + // expected-error@+2 {{call to a shared ZA function requires the caller to have ZA state}} + // expected-cpp-error@+1 {{call to a shared ZA function requires the caller to have ZA state}} + preserves_za_fn_ptr(); } -void shared_za_definition(void (*shared_za_fn_ptr)(void) __arm_shared_za) __arm_shared_za { +void shared_za_definition(void (*shared_za_fn_ptr)(void) __arm_inout("za")) __arm_inout("za") { sme_arm_shared_za(); // OK shared_za_fn_ptr(); // OK } -__arm_new_za void new_za_definition(void (*shared_za_fn_ptr)(void) __arm_shared_za) { +__arm_new("za") void new_za_definition(void (*shared_za_fn_ptr)(void) __arm_inout("za")) { sme_arm_shared_za(); // OK shared_za_fn_ptr(); // OK } #ifdef __cplusplus -int shared_za_initializer(void) __arm_shared_za; +int shared_za_initializer(void) __arm_inout("za"); // expected-cpp-error@+1 {{call to a shared ZA function requires the caller to have ZA state}} int global = shared_za_initializer(); struct S { - virtual void shared_za_memberfn(void) __arm_shared_za; + virtual void shared_za_memberfn(void) __arm_inout("za"); }; struct S2 : public S { -// expected-cpp-error@+2 {{virtual function 'shared_za_memberfn' has different attributes ('void ()') than the function it overrides (which has 'void () __arm_shared_za')}} +// expected-cpp-error@+2 {{virtual function 'shared_za_memberfn' has different attributes ('void ()') than the function it overrides (which has 'void () __arm_inout("za")')}} // expected-cpp-note@-5 {{overridden virtual function is here}} - __arm_new_za void shared_za_memberfn(void) override {} + __arm_new("za") void shared_za_memberfn(void) override {} }; -// The '__arm_preserves_za' property cannot be dropped when overriding a virtual -// function. It is however fine for the overriding function to be '__arm_preserves_za' +// The '__arm_preserves("za")' property cannot be dropped when overriding a virtual +// function. It is however fine for the overriding function to be '__arm_preserves("za")' // even though the function that it overrides is not. struct S_PreservesZA { - virtual void memberfn(void) __arm_preserves_za; + virtual void memberfn(void) __arm_preserves("za"); }; struct S_Drop_PreservesZA : S_PreservesZA { -// expected-cpp-error@+2 {{virtual function 'memberfn' has different attributes ('void ()') than the function it overrides (which has 'void () __arm_preserves_za')}} +// expected-cpp-error@+2 {{virtual function 'memberfn' has different attributes ('void ()') than the function it overrides (which has 'void () __arm_preserves("za")')}} // expected-cpp-note@-5 {{overridden virtual function is here}} void memberfn(void) override {} }; @@ -230,9 +228,11 @@ struct S_Drop_PreservesZA : S_PreservesZA { struct S_NoPreservesZA { virtual void memberfn(void); }; + struct S_AddPreservesZA : S_NoPreservesZA { -// This is fine, the overridden function just adds more guarantees. - void memberfn(void) __arm_preserves_za override {} +// expected-cpp-error@+2 {{virtual function 'memberfn' has different attributes ('void () __arm_preserves("za")') than the function it overrides (which has 'void ()')}} +// expected-cpp-note@-5 {{overridden virtual function is here}} + void memberfn(void) __arm_preserves("za") override {} }; @@ -258,20 +258,20 @@ struct S3 { }; template <> -struct S3 { +struct S3 { static constexpr int value = 8; }; template <> -struct S3 { +struct S3 { static constexpr int value = 16; }; void normal_func(void) {} void streaming_func(void) __arm_streaming {} void streaming_compatible_func(void) __arm_streaming_compatible {} -void shared_za_func(void) __arm_shared_za {} -void preserves_za_func(void) __arm_preserves_za {} +void shared_za_func(void) __arm_inout("za") {} +void preserves_za_func(void) __arm_preserves("za") {} static_assert(S3::value == 1, "why are we picking the wrong specialization?"); static_assert(S3::value == 2, "why are we picking the wrong specialization?"); @@ -295,8 +295,8 @@ template int test_templated_f(T); template<> constexpr int test_templated_f(void(*)(void)) { return 1; } template<> constexpr int test_templated_f(void(*)(void)__arm_streaming) { return 2; } template<> constexpr int test_templated_f(void(*)(void)__arm_streaming_compatible) { return 4; } -template<> constexpr int test_templated_f(void(*)(void)__arm_shared_za) { return 8; } -template<> constexpr int test_templated_f(void(*)(void)__arm_preserves_za) { return 16; } +template<> constexpr int test_templated_f(void(*)(void)__arm_inout("za")) { return 8; } +template<> constexpr int test_templated_f(void(*)(void)__arm_preserves("za")) { return 16; } static_assert(test_templated_f(&normal_func) == 1, "Instantiated to wrong function"); static_assert(test_templated_f(&streaming_func) == 2, "Instantiated to wrong function"); @@ -312,8 +312,8 @@ int invalid_type_for_attribute __arm_streaming; constexpr int overload(void f(void)) { return 1; } constexpr int overload(void f(void) __arm_streaming) { return 2; } constexpr int overload(void f(void) __arm_streaming_compatible) { return 4; } -constexpr int overload(void f(void) __arm_shared_za) { return 8; } -constexpr int overload(void f(void) __arm_preserves_za) { return 16; } +constexpr int overload(void f(void) __arm_inout("za")) { return 8; } +constexpr int overload(void f(void) __arm_preserves("za")) { return 16; } static_assert(overload(&normal_func) == 1, "Overloaded to wrong function"); static_assert(overload(&streaming_func) == 2, "Overloaded to wrong function"); static_assert(overload(&streaming_compatible_func) == 4, "Overloaded to wrong function"); @@ -330,3 +330,73 @@ constexpr X *ptr = 0; static_assert(overload_int(ptr->foo) == 2, "Overloaded to the wrong function after implicit instantiation"); #endif // ifdef __cplusplus + +// expected-cpp-error@+2 {{unknown state ''}} +// expected-error@+1 {{unknown state ''}} +__arm_new("") void invalid_arm_new_empty_string(void); +// expected-cpp-error@+2 {{expected string literal as argument of '__arm_new' attribute}} +// expected-error@+1 {{expected string literal as argument of '__arm_new' attribute}} +__arm_new(0) void invalid_arm_new_non_literal_string(void); +// expected-cpp-error@+2 {{unknown state 'unknownstate'}} +// expected-error@+1 {{unknown state 'unknownstate'}} +__arm_new("unknownstate") void invalid_arm_new_unknown_state(void); + +// expected-cpp-error@+2 {{unknown state ''}} +// expected-error@+1 {{unknown state ''}} +void invalid_arm_in_empty_string(void) __arm_in(""); +// expected-cpp-error@+2 {{expected string literal as argument of '__arm_in' attribute}} +// expected-error@+1 {{expected string literal as argument of '__arm_in' attribute}} +void invalid_arm_in_non_literal_string(void) __arm_in(0); +// expected-cpp-error@+2 {{unknown state 'unknownstate'}} +// expected-error@+1 {{unknown state 'unknownstate'}} +void invalid_arm_in_unknown_state(void) __arm_in("unknownstate"); + +void valid_state_attrs_in_in1(void) __arm_in("za"); +void valid_state_attrs_in_in2(void) __arm_in("za", "za"); + +// expected-cpp-error@+2 {{missing state for '__arm_in'}} +// expected-error@+1 {{missing state for '__arm_in'}} +void invalid_state_attrs_no_arg1(void) __arm_in(); +// expected-cpp-error@+2 {{missing state for '__arm_new'}} +// expected-error@+1 {{missing state for '__arm_new'}} +__arm_new() void invalid_state_attrs_no_arg2(void); + +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_in_out(void) __arm_in("za") __arm_out("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_in_inout(void) __arm_in("za") __arm_inout("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_in_preserves(void) __arm_in("za") __arm_preserves("za"); + +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_out_in(void) __arm_out("za") __arm_in("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_out_inout(void) __arm_out("za") __arm_inout("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_out_preserves(void) __arm_out("za") __arm_preserves("za"); + +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_inout_in(void) __arm_inout("za") __arm_in("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_inout_out(void) __arm_inout("za") __arm_out("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_inout_preserves(void) __arm_inout("za") __arm_preserves("za"); + +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_preserves_in(void) __arm_preserves("za") __arm_in("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_preserves_out(void) __arm_preserves("za") __arm_out("za"); +// expected-cpp-error@+2 {{conflicting attributes for state 'za'}} +// expected-error@+1 {{conflicting attributes for state 'za'}} +void conflicting_state_attrs_preserves_inout(void) __arm_preserves("za") __arm_inout("za"); diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp index 529d0d2d1e625..40254a5a0eafa 100644 --- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp @@ -12,7 +12,7 @@ #include -void test_range_0_0(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_range_0_0(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} SVE_ACLE_FUNC(svld1_hor_za8,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} @@ -32,7 +32,7 @@ void test_range_0_0(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __ar SVE_ACLE_FUNC(svwrite_ver_za8, _s8, _m,)(1, slice, pg, svundef_s8()); } -void test_range_0_1(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_range_0_1(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} SVE_ACLE_FUNC(svld1_hor_za16,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} @@ -52,7 +52,7 @@ void test_range_0_1(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __ar SVE_ACLE_FUNC(svwrite_ver_za16, _s16, _m,)(2, slice, pg, svundef_s16()); } -void test_range_0_3(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_range_0_3(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} SVE_ACLE_FUNC(svld1_hor_za32,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} @@ -90,7 +90,7 @@ void test_range_0_3(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __ar SVE_ACLE_FUNC(svusmops_za32, _u8, _m,)(-1, pg, pg, svundef_u8(), svundef_s8()); } -void test_range_0_7(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_range_0_7(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} SVE_ACLE_FUNC(svld1_hor_za64,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} @@ -133,7 +133,7 @@ void test_range_0_7(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __ar SVE_ACLE_FUNC(svmops_za64, _f64, _m,)(-1, pg, pg, svundef_f64(), svundef_f64()); } -void test_range_0_15(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_range_0_15(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} SVE_ACLE_FUNC(svld1_hor_za128,,,)(-1, slice, pg, ptr); // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} @@ -153,14 +153,14 @@ void test_range_0_15(uint32_t slice, svbool_t pg, void *ptr) __arm_streaming __a SVE_ACLE_FUNC(svwrite_ver_za128, _s8, _m,)(16, slice, pg, svundef_s8()); } -void test_range_0_255(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_range_0_255(svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { // expected-error@+1 {{argument value 256 is outside the valid range [0, 255]}} SVE_ACLE_FUNC(svzero_mask_za,,,)(256); // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 255]}} SVE_ACLE_FUNC(svzero_mask_za,,,)(-1); } -void test_constant(uint64_t u64, svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_constant(uint64_t u64, svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { SVE_ACLE_FUNC(svld1_hor_za8,,,)(u64, u64, pg, ptr); // expected-error {{argument to 'svld1_hor_za8' must be a constant integer}} SVE_ACLE_FUNC(svst1_hor_za32,,,)(u64, 0, pg, ptr); // expected-error {{argument to 'svst1_hor_za32' must be a constant integer}} SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(u64, 0, pg, ptr, u64); // expected-error {{argument to 'svld1_hor_vnum_za8' must be a constant integer}} diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c index 95bb6be2d2d34..f1e858f819602 100644 --- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c @@ -6,21 +6,21 @@ #include __attribute__((target("sme"))) -void test_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_sme(svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { svld1_hor_za8(0, 0, pg, ptr); } __attribute__((target("arch=armv8-a+sme"))) -void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { svld1_hor_vnum_za32(0, 0, pg, ptr, 0); } __attribute__((target("+sme"))) -void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { +void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming __arm_inout("za") { svst1_ver_za16(0, 0, pg, ptr); } __attribute__((target("+sme"))) -void undefined(svbool_t pg, void *ptr) __arm_shared_za { +void undefined(svbool_t pg, void *ptr) __arm_inout("za") { svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-warning {{builtin call has undefined behaviour when called from a non-streaming function}} } diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp index 5118f743174c2..36103480861c3 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp @@ -5,7 +5,7 @@ #include -void test_multivector_read(uint32_t base) __arm_streaming __arm_shared_za __arm_preserves_za { +void test_multivector_read(uint32_t base) __arm_streaming __arm_in("za") { // Test Tile Range svread_hor_za8_u8_vg2(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} @@ -32,7 +32,7 @@ void test_multivector_read(uint32_t base) __arm_streaming __arm_shared_za __arm_ void test_multivector_write(uint32_t base, svuint8x2_t v8x2, svuint8x4_t v8x4, svuint16x2_t v16x2, svuint16x4_t v16x4, svuint32x2_t v32x2, svuint32x4_t v32x4, - svuint64x2_t v64x2, svuint64x4_t v64x4) __arm_streaming __arm_shared_za { + svuint64x2_t v64x2, svuint64x4_t v64x4) __arm_streaming __arm_inout("za") { // Test Tile Range svwrite_hor_za8_u8_vg2(1, base, v8x2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} @@ -56,7 +56,7 @@ void test_multivector_write(uint32_t base, svuint8x2_t v8x2, svuint8x4_t v8x4, svwrite_ver_za64_u64_vg4(8, base, v64x4); // expected-error {{argument value 8 is outside the valid range [0, 7]}} } -void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32) __arm_streaming __arm_shared_za { +void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32) __arm_streaming __arm_inout("za") { // Test Tile Range svmopa_za32_u16_m(4, pred, pred, u16, u16); // expected-error {{argument value 4 is outside the valid range [0, 3]}} svmopa_za32_s16_m(4, pred, pred, s16, s16); // expected-error {{argument value 4 is outside the valid range [0, 3]}} @@ -71,15 +71,15 @@ void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t svbmops_za32_s32_m(4, pred, pred, s32, s32); // expected-error {{argument value 4 is outside the valid range [0, 3]}} } -void test_ldr_zt(const void *const_base) __arm_streaming_compatible __arm_shared_za { +void test_ldr_zt(const void *const_base) __arm_streaming_compatible __arm_inout("za") { svldr_zt(1, const_base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} } -void test_str_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { +void test_str_zt(void *base) __arm_streaming_compatible __arm_in("za") { svstr_zt(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} } -void test_svluti2_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +void test_svluti2_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("za") { // Test Reg Offset svluti2_lane_zt_u8_x4(1, zn, 0); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -106,7 +106,7 @@ void test_svluti2_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_shared_za __arm svluti2_lane_zt_f32_x4(0, zn, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} } -void test_svluti4_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_shared_za __arm_preserves_za { +void test_svluti4_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("za") { // Test Reg Offset svluti4_lane_zt_u16_x4(1, zn, 0); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -129,7 +129,7 @@ void test_svluti4_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_shared_za __arm svluti4_lane_zt_f32_x4(0, zn, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} } -void test_svluti2_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_shared_za __arm_preserves_za { +void test_svluti2_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("za") { // Test Reg Offset svluti2_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -156,7 +156,7 @@ void test_svluti2_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_shared_za __arm svluti2_lane_zt_f32(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} } -void test_svluti4_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_shared_za __arm_preserves_za { +void test_svluti4_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("za") { // Test Reg Offset svluti4_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -183,7 +183,7 @@ void test_svluti4_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_shared_za __arm svluti4_lane_zt_f32(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} } -void test_svluti2_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_shared_za __arm_preserves_za { +void test_svluti2_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("za") { // Test Reg Offset svluti2_lane_zt_u8_x2(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -210,7 +210,7 @@ void test_svluti2_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_shared_za __ svluti2_lane_zt_f32_x2(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} } -void test_svluti4_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_shared_za __arm_preserves_za { +void test_svluti4_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("za") { // Test Reg Offset svluti4_lane_zt_u8_x2(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -245,7 +245,7 @@ void test_bfmlslb_bad_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm) __ void test_multiply_add_sub_long(uint32_t base, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, svint8x2_t s8x2, svuint8x2_t u8x2, svint16x2_t s16x2, svuint16x2_t u16x2, - svint8x4_t s8x4, svuint8x4_t u8x4, svint16x4_t s16x4, svuint16x4_t u16x4) __arm_streaming __arm_shared_za { + svint8x4_t s8x4, svuint8x4_t u8x4, svint16x4_t s16x4, svuint16x4_t u16x4) __arm_streaming __arm_inout("za") { svmla_lane_za32_s8_vg4x1(base, s8, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} svmla_lane_za32_u8_vg4x1(base, u8, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} @@ -292,7 +292,7 @@ void test_vertical_dot_product(uint32_t base, svint16x2_t s16x2, svuint16x2_t u1 svfloat16x2_t f16x2, svbfloat16x2_t bf16x2, svint16_t s16, svuint16_t u16, svint8_t s8, svuint8_t u8, - svfloat16_t f16, svbfloat16_t b16) __arm_streaming __arm_shared_za { + svfloat16_t f16, svbfloat16_t b16) __arm_streaming __arm_inout("za") { // Test lane indices. svvdot_lane_za32_s16_vg1x2(base, s16x2, s16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} svvdot_lane_za32_u16_vg1x2(base, u16x2, u16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} @@ -309,7 +309,7 @@ void test_vertical_dot_product(uint32_t base, svint16x2_t s16x2, svuint16x2_t u1 void test_fdot_za32_bad_lane(uint32_t slice_base, svfloat16_t z_f16, svfloat16x2_t z_f16x2, svfloat16x4_t z_f16x4, svbfloat16_t z_bf16, svbfloat16x2_t z_bf16x2, - svbfloat16x4_t z_bf16x4) __arm_streaming __arm_shared_za { + svbfloat16x4_t z_bf16x4) __arm_streaming __arm_inout("za") { // 16-bit float svdot_lane_za32_f16_vg1x2(slice_base, z_f16x2, z_f16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} svdot_lane_za32_f16_vg1x4(slice_base, z_f16x4, z_f16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} @@ -325,7 +325,7 @@ void test_svdot_multi_za32_bad_lane(uint32_t slice_base, svuint16_t z_u16, svint16x4_t z_s16x4, svuint8_t z_u8, svuint8x2_t z_u8x2, svuint8x4_t z_u8x4, svint8_t z_s8, svint8x2_t z_s8x2, - svint8x4_t z_s8x4) __arm_streaming __arm_shared_za { + svint8x4_t z_s8x4) __arm_streaming __arm_inout("za") { // Multi, indexed (unsigned) svdot_lane_za32_u16_vg1x2(slice_base, z_u16x2, z_u16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} svdot_lane_za32_u16_vg1x4(slice_base, z_u16x4, z_u16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 4ec00573e8a9d..c35490cf2e5ae 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -3547,23 +3547,27 @@ static void GenerateHasAttrSpellingStringSwitch( OS << " .Default(0);\n"; } -// Emits the list of tokens for regular keyword attributes. -void EmitClangAttrTokenKinds(RecordKeeper &Records, raw_ostream &OS) { - emitSourceFileHeader("A list of tokens generated from the attribute" - " definitions", - OS); +// Emits list of regular keyword attributes with info about their arguments. +void EmitClangRegularKeywordAttributeInfo(RecordKeeper &Records, + raw_ostream &OS) { + emitSourceFileHeader( + "A list of regular keyword attributes generated from the attribute" + " definitions", + OS); // Assume for now that the same token is not used in multiple regular // keyword attributes. for (auto *R : Records.getAllDerivedDefinitions("Attr")) - for (const auto &S : GetFlattenedSpellings(*R)) - if (isRegularKeywordAttribute(S)) { - if (!R->getValueAsListOfDefs("Args").empty()) - PrintError(R->getLoc(), - "RegularKeyword attributes with arguments are not " - "yet supported"); - OS << "KEYWORD_ATTRIBUTE(" - << S.getSpellingRecord().getValueAsString("Name") << ")\n"; - } + for (const auto &S : GetFlattenedSpellings(*R)) { + if (!isRegularKeywordAttribute(S)) + continue; + std::vector Args = R->getValueAsListOfDefs("Args"); + bool HasArgs = llvm::any_of( + Args, [](const Record *Arg) { return !Arg->getValueAsBit("Fake"); }); + + OS << "KEYWORD_ATTRIBUTE(" + << S.getSpellingRecord().getValueAsString("Name") << ", " + << (HasArgs ? "true" : "false") << ")\n"; + } OS << "#undef KEYWORD_ATTRIBUTE\n"; } diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 5de2223e71b04..060d79a06af88 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1619,7 +1619,7 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) { OS << "}\n\n"; OS << "__ai __attribute__((target(\"sme\"))) void svundef_za(void) " - "__arm_streaming_compatible __arm_shared_za " + "__arm_streaming_compatible __arm_out(\"za\") " "{ }\n\n"; createCoreHeaderIntrinsics(OS, *this, ACLEKind::SME); diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp index 3859555d647fd..158d10e2b3d6b 100644 --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -37,7 +37,7 @@ enum ActionType { GenClangAttrSubjectMatchRuleList, GenClangAttrPCHRead, GenClangAttrPCHWrite, - GenClangAttrTokenKinds, + GenClangRegularKeywordAttributeInfo, GenClangAttrHasAttributeImpl, GenClangAttrSpellingListIndex, GenClangAttrASTVisitor, @@ -150,8 +150,10 @@ cl::opt Action( "Generate clang PCH attribute reader"), clEnumValN(GenClangAttrPCHWrite, "gen-clang-attr-pch-write", "Generate clang PCH attribute writer"), - clEnumValN(GenClangAttrTokenKinds, "gen-clang-attr-token-kinds", - "Generate a list of attribute-related clang tokens"), + clEnumValN(GenClangRegularKeywordAttributeInfo, + "gen-clang-regular-keyword-attr-info", + "Generate a list of regular keyword attributes with info " + "about their arguments"), clEnumValN(GenClangAttrHasAttributeImpl, "gen-clang-attr-has-attribute-impl", "Generate a clang attribute spelling list"), @@ -291,11 +293,14 @@ cl::opt Action( "Generate riscv_vector_builtin_cg.inc for clang"), clEnumValN(GenRISCVVectorBuiltinSema, "gen-riscv-vector-builtin-sema", "Generate riscv_vector_builtin_sema.inc for clang"), - clEnumValN(GenRISCVSiFiveVectorBuiltins, "gen-riscv-sifive-vector-builtins", + clEnumValN(GenRISCVSiFiveVectorBuiltins, + "gen-riscv-sifive-vector-builtins", "Generate riscv_sifive_vector_builtins.inc for clang"), - clEnumValN(GenRISCVSiFiveVectorBuiltinCG, "gen-riscv-sifive-vector-builtin-codegen", + clEnumValN(GenRISCVSiFiveVectorBuiltinCG, + "gen-riscv-sifive-vector-builtin-codegen", "Generate riscv_sifive_vector_builtin_cg.inc for clang"), - clEnumValN(GenRISCVSiFiveVectorBuiltinSema, "gen-riscv-sifive-vector-builtin-sema", + clEnumValN(GenRISCVSiFiveVectorBuiltinSema, + "gen-riscv-sifive-vector-builtin-sema", "Generate riscv_sifive_vector_builtin_sema.inc for clang"), clEnumValN(GenAttrDocs, "gen-attr-docs", "Generate attribute documentation"), @@ -355,8 +360,8 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenClangAttrPCHWrite: EmitClangAttrPCHWrite(Records, OS); break; - case GenClangAttrTokenKinds: - EmitClangAttrTokenKinds(Records, OS); + case GenClangRegularKeywordAttributeInfo: + EmitClangRegularKeywordAttributeInfo(Records, OS); break; case GenClangAttrHasAttributeImpl: EmitClangAttrHasAttrImpl(Records, OS); diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index faa0c5d2cff9e..58a4af3c23a67 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -53,8 +53,8 @@ void EmitClangAttrSubjectMatchRuleList(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrPCHRead(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrPCHWrite(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangAttrTokenKinds(llvm::RecordKeeper &Records, - llvm::raw_ostream &OS); +void EmitClangRegularKeywordAttributeInfo(llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); void EmitClangAttrHasAttrImpl(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrSpellingListIndex(llvm::RecordKeeper &Records,